From b164d8ee536dba526f9da2083433d529daf7b37b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 29 Mar 2019 17:01:17 -0400 Subject: [PATCH 01/29] Implement a new Core Scheduler --- src/core/hle/kernel/scheduler.cpp | 455 +++++++++++++++++++----------- src/core/hle/kernel/scheduler.h | 234 +++++++-------- 2 files changed, 421 insertions(+), 268 deletions(-) diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index e8447b69a..878aeed6d 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -3,6 +3,8 @@ // Refer to the license.txt file included. #include +#include +#include #include #include "common/assert.h" @@ -17,57 +19,286 @@ namespace Kernel { -std::mutex Scheduler::scheduler_mutex; +void GlobalScheduler::AddThread(SharedPtr thread) { + thread_list.push_back(std::move(thread)); +} -Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core) - : cpu_core{cpu_core}, system{system} {} +void GlobalScheduler::RemoveThread(Thread* thread) { + thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread), + thread_list.end()); +} -Scheduler::~Scheduler() { - for (auto& thread : thread_list) { - thread->Stop(); +/* + * SelectThreads, Yield functions originally by TuxSH. + * licensed under GPLv2 or later under exception provided by the author. + */ + +void GlobalScheduler::UnloadThread(s32 core) { + Scheduler& sched = Core::System::GetInstance().Scheduler(core); + sched.UnloadThread(); +} + +void GlobalScheduler::SelectThread(u32 core) { + auto update_thread = [](Thread* thread, Scheduler& sched) { + if (thread != sched.selected_thread) { + if (thread == nullptr) { + ++sched.idle_selection_count; + } + sched.selected_thread = thread; + } + sched.context_switch_pending = sched.selected_thread != sched.current_thread; + std::atomic_thread_fence(std::memory_order_seq_cst); + }; + Scheduler& sched = Core::System::GetInstance().Scheduler(core); + Thread* current_thread = nullptr; + current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front(); + if (!current_thread) { + Thread* winner = nullptr; + std::set sug_cores; + for (auto thread : suggested_queue[core]) { + s32 this_core = thread->GetProcessorID(); + Thread* thread_on_core = nullptr; + if (this_core >= 0) { + thread_on_core = scheduled_queue[this_core].front(); + } + if (this_core < 0 || thread != thread_on_core) { + winner = thread; + break; + } + sug_cores.insert(this_core); + } + if (winner && winner->GetPriority() > 2) { + if (winner->IsRunning()) { + UnloadThread(winner->GetProcessorID()); + } + TransferToCore(winner->GetPriority(), core, winner); + current_thread = winner; + } else { + for (auto& src_core : sug_cores) { + auto it = scheduled_queue[src_core].begin(); + it++; + if (it != scheduled_queue[src_core].end()) { + Thread* thread_on_core = scheduled_queue[src_core].front(); + Thread* to_change = *it; + if (thread_on_core->IsRunning() || to_change->IsRunning()) { + UnloadThread(src_core); + } + TransferToCore(thread_on_core->GetPriority(), core, thread_on_core); + current_thread = thread_on_core; + } + } + } + } + update_thread(current_thread, sched); +} + +void GlobalScheduler::SelectThreads() { + auto update_thread = [](Thread* thread, Scheduler& sched) { + if (thread != sched.selected_thread) { + if (thread == nullptr) { + ++sched.idle_selection_count; + } + sched.selected_thread = thread; + } + sched.context_switch_pending = sched.selected_thread != sched.current_thread; + std::atomic_thread_fence(std::memory_order_seq_cst); + }; + + auto& system = Core::System::GetInstance(); + + std::unordered_set picked_threads; + // This maintain the "current thread is on front of queue" invariant + std::array current_threads; + for (u32 i = 0; i < NUM_CPU_CORES; i++) { + Scheduler& sched = system.Scheduler(i); + current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front(); + if (current_threads[i]) + picked_threads.insert(current_threads[i]); + update_thread(current_threads[i], sched); + } + + // Do some load-balancing. Allow second pass. + std::array current_threads_2 = current_threads; + for (u32 i = 0; i < NUM_CPU_CORES; i++) { + if (!scheduled_queue[i].empty()) { + continue; + } + Thread* winner = nullptr; + for (auto thread : suggested_queue[i]) { + if (thread->GetProcessorID() < 0 || thread != current_threads[i]) { + if (picked_threads.count(thread) == 0 && !thread->IsRunning()) { + winner = thread; + break; + } + } + } + if (winner) { + TransferToCore(winner->GetPriority(), i, winner); + current_threads_2[i] = winner; + picked_threads.insert(winner); + } + } + + // See which to-be-current threads have changed & update accordingly + for (u32 i = 0; i < NUM_CPU_CORES; i++) { + Scheduler& sched = system.Scheduler(i); + if (current_threads_2[i] != current_threads[i]) { + update_thread(current_threads_2[i], sched); + } + } + + reselection_pending.store(false, std::memory_order_release); +} + +void GlobalScheduler::YieldThread(Thread* yielding_thread) { + // Note: caller should use critical section, etc. + u32 core_id = static_cast(yielding_thread->GetProcessorID()); + u32 priority = yielding_thread->GetPriority(); + + // Yield the thread + ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority), + "Thread yielding without being in front"); + scheduled_queue[core_id].yield(priority); + + Thread* winner = scheduled_queue[core_id].front(priority); + AskForReselectionOrMarkRedundant(yielding_thread, winner); +} + +void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { + // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, + // etc. + u32 core_id = static_cast(yielding_thread->GetProcessorID()); + u32 priority = yielding_thread->GetPriority(); + + // Yield the thread + ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority), + "Thread yielding without being in front"); + scheduled_queue[core_id].yield(priority); + + std::array current_threads; + for (u32 i = 0; i < NUM_CPU_CORES; i++) { + current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front(); + } + + Thread* next_thread = scheduled_queue[core_id].front(priority); + Thread* winner = nullptr; + for (auto& thread : suggested_queue[core_id]) { + s32 source_core = thread->GetProcessorID(); + if (source_core >= 0) { + if (current_threads[source_core] != nullptr) { + if (thread == current_threads[source_core] || + current_threads[source_core]->GetPriority() < min_regular_priority) + continue; + } + if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() || + next_thread->GetPriority() < thread->GetPriority()) { + if (thread->GetPriority() <= priority) { + winner = thread; + break; + } + } + } + } + + if (winner != nullptr) { + if (winner != yielding_thread) { + if (winner->IsRunning()) + UnloadThread(winner->GetProcessorID()); + TransferToCore(winner->GetPriority(), core_id, winner); + } + } else { + winner = next_thread; + } + + AskForReselectionOrMarkRedundant(yielding_thread, winner); +} + +void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) { + // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, + // etc. + Thread* winner = nullptr; + u32 core_id = static_cast(yielding_thread->GetProcessorID()); + + // Remove the thread from its scheduled mlq, put it on the corresponding "suggested" one instead + TransferToCore(yielding_thread->GetPriority(), -1, yielding_thread); + + // If the core is idle, perform load balancing, excluding the threads that have just used this + // function... + if (scheduled_queue[core_id].empty()) { + // Here, "current_threads" is calculated after the ""yield"", unlike yield -1 + std::array current_threads; + for (u32 i = 0; i < NUM_CPU_CORES; i++) { + current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front(); + } + for (auto& thread : suggested_queue[core_id]) { + s32 source_core = thread->GetProcessorID(); + if (source_core < 0 || thread == current_threads[source_core]) + continue; + if (current_threads[source_core] == nullptr || + current_threads[source_core]->GetPriority() >= min_regular_priority) { + winner = thread; + } + break; + } + if (winner != nullptr) { + if (winner != yielding_thread) { + if (winner->IsRunning()) + UnloadThread(winner->GetProcessorID()); + TransferToCore(winner->GetPriority(), core_id, winner); + } + } else { + winner = yielding_thread; + } + } + + AskForReselectionOrMarkRedundant(yielding_thread, winner); +} + +void GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) { + if (current_thread == winner) { + // Nintendo (not us) has a nullderef bug on current_thread->owner, but which is never + // triggered. + // current_thread->SetRedundantSchedulerOperation(); + } else { + reselection_pending.store(true, std::memory_order_release); } } +GlobalScheduler::~GlobalScheduler() = default; + +Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 id) + : system(system), cpu_core(cpu_core), id(id) {} + +Scheduler::~Scheduler() {} + bool Scheduler::HaveReadyThreads() const { - std::lock_guard lock{scheduler_mutex}; - return !ready_queue.empty(); + return system.GlobalScheduler().HaveReadyThreads(id); } Thread* Scheduler::GetCurrentThread() const { return current_thread.get(); } +Thread* Scheduler::GetSelectedThread() const { + return selected_thread.get(); +} + +void Scheduler::SelectThreads() { + system.GlobalScheduler().SelectThread(id); +} + u64 Scheduler::GetLastContextSwitchTicks() const { return last_context_switch_time; } -Thread* Scheduler::PopNextReadyThread() { - Thread* next = nullptr; - Thread* thread = GetCurrentThread(); - - if (thread && thread->GetStatus() == ThreadStatus::Running) { - if (ready_queue.empty()) { - return thread; - } - // We have to do better than the current thread. - // This call returns null when that's not possible. - next = ready_queue.front(); - if (next == nullptr || next->GetPriority() >= thread->GetPriority()) { - next = thread; - } - } else { - if (ready_queue.empty()) { - return nullptr; - } - next = ready_queue.front(); - } - - return next; +void Scheduler::TryDoContextSwitch() { + if (context_switch_pending) + SwitchContext(); } -void Scheduler::SwitchContext(Thread* new_thread) { - Thread* previous_thread = GetCurrentThread(); - Process* const previous_process = system.Kernel().CurrentProcess(); +void Scheduler::UnloadThread() { + Thread* const previous_thread = GetCurrentThread(); + Process* const previous_process = Core::CurrentProcess(); UpdateLastContextSwitchTime(previous_thread, previous_process); @@ -80,23 +311,51 @@ void Scheduler::SwitchContext(Thread* new_thread) { if (previous_thread->GetStatus() == ThreadStatus::Running) { // This is only the case when a reschedule is triggered without the current thread // yielding execution (i.e. an event triggered, system core time-sliced, etc) - ready_queue.add(previous_thread, previous_thread->GetPriority(), false); previous_thread->SetStatus(ThreadStatus::Ready); } + previous_thread->SetIsRunning(false); + } + current_thread = nullptr; +} + +void Scheduler::SwitchContext() { + Thread* const previous_thread = GetCurrentThread(); + Thread* const new_thread = GetSelectedThread(); + + context_switch_pending = false; + if (new_thread == previous_thread) + return; + + Process* const previous_process = Core::CurrentProcess(); + + UpdateLastContextSwitchTime(previous_thread, previous_process); + + // Save context for previous thread + if (previous_thread) { + cpu_core.SaveContext(previous_thread->GetContext()); + // Save the TPIDR_EL0 system register in case it was modified. + previous_thread->SetTPIDR_EL0(cpu_core.GetTPIDR_EL0()); + + if (previous_thread->GetStatus() == ThreadStatus::Running) { + // This is only the case when a reschedule is triggered without the current thread + // yielding execution (i.e. an event triggered, system core time-sliced, etc) + previous_thread->SetStatus(ThreadStatus::Ready); + } + previous_thread->SetIsRunning(false); } // Load context of new thread if (new_thread) { + ASSERT_MSG(new_thread->GetProcessorID() == this->id, + "Thread must be assigned to this core."); ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready, "Thread must be ready to become running."); // Cancel any outstanding wakeup events for this thread new_thread->CancelWakeupTimer(); - current_thread = new_thread; - - ready_queue.remove(new_thread, new_thread->GetPriority()); new_thread->SetStatus(ThreadStatus::Running); + new_thread->SetIsRunning(true); auto* const thread_owner_process = current_thread->GetOwnerProcess(); if (previous_process != thread_owner_process) { @@ -116,7 +375,7 @@ void Scheduler::SwitchContext(Thread* new_thread) { void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { const u64 prev_switch_ticks = last_context_switch_time; - const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks(); + const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; if (thread != nullptr) { @@ -130,124 +389,4 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { last_context_switch_time = most_recent_switch_ticks; } -void Scheduler::Reschedule() { - std::lock_guard lock{scheduler_mutex}; - - Thread* cur = GetCurrentThread(); - Thread* next = PopNextReadyThread(); - - if (cur && next) { - LOG_TRACE(Kernel, "context switch {} -> {}", cur->GetObjectId(), next->GetObjectId()); - } else if (cur) { - LOG_TRACE(Kernel, "context switch {} -> idle", cur->GetObjectId()); - } else if (next) { - LOG_TRACE(Kernel, "context switch idle -> {}", next->GetObjectId()); - } - - SwitchContext(next); -} - -void Scheduler::AddThread(SharedPtr thread) { - std::lock_guard lock{scheduler_mutex}; - - thread_list.push_back(std::move(thread)); -} - -void Scheduler::RemoveThread(Thread* thread) { - std::lock_guard lock{scheduler_mutex}; - - thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread), - thread_list.end()); -} - -void Scheduler::ScheduleThread(Thread* thread, u32 priority) { - std::lock_guard lock{scheduler_mutex}; - - ASSERT(thread->GetStatus() == ThreadStatus::Ready); - ready_queue.add(thread, priority); -} - -void Scheduler::UnscheduleThread(Thread* thread, u32 priority) { - std::lock_guard lock{scheduler_mutex}; - - ASSERT(thread->GetStatus() == ThreadStatus::Ready); - ready_queue.remove(thread, priority); -} - -void Scheduler::SetThreadPriority(Thread* thread, u32 priority) { - std::lock_guard lock{scheduler_mutex}; - if (thread->GetPriority() == priority) { - return; - } - - // If thread was ready, adjust queues - if (thread->GetStatus() == ThreadStatus::Ready) - ready_queue.adjust(thread, thread->GetPriority(), priority); -} - -Thread* Scheduler::GetNextSuggestedThread(u32 core, u32 maximum_priority) const { - std::lock_guard lock{scheduler_mutex}; - - const u32 mask = 1U << core; - for (auto* thread : ready_queue) { - if ((thread->GetAffinityMask() & mask) != 0 && thread->GetPriority() < maximum_priority) { - return thread; - } - } - return nullptr; -} - -void Scheduler::YieldWithoutLoadBalancing(Thread* thread) { - ASSERT(thread != nullptr); - // Avoid yielding if the thread isn't even running. - ASSERT(thread->GetStatus() == ThreadStatus::Running); - - // Sanity check that the priority is valid - ASSERT(thread->GetPriority() < THREADPRIO_COUNT); - - // Yield this thread -- sleep for zero time and force reschedule to different thread - GetCurrentThread()->Sleep(0); -} - -void Scheduler::YieldWithLoadBalancing(Thread* thread) { - ASSERT(thread != nullptr); - const auto priority = thread->GetPriority(); - const auto core = static_cast(thread->GetProcessorID()); - - // Avoid yielding if the thread isn't even running. - ASSERT(thread->GetStatus() == ThreadStatus::Running); - - // Sanity check that the priority is valid - ASSERT(priority < THREADPRIO_COUNT); - - // Sleep for zero time to be able to force reschedule to different thread - GetCurrentThread()->Sleep(0); - - Thread* suggested_thread = nullptr; - - // Search through all of the cpu cores (except this one) for a suggested thread. - // Take the first non-nullptr one - for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) { - const auto res = - system.CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core, priority); - - // If scheduler provides a suggested thread - if (res != nullptr) { - // And its better than the current suggested thread (or is the first valid one) - if (suggested_thread == nullptr || - suggested_thread->GetPriority() > res->GetPriority()) { - suggested_thread = res; - } - } - } - - // If a suggested thread was found, queue that for this core - if (suggested_thread != nullptr) - suggested_thread->ChangeCore(core, suggested_thread->GetAffinityMask()); -} - -void Scheduler::YieldAndWaitForLoadBalancing(Thread* thread) { - UNIMPLEMENTED_MSG("Wait for load balancing thread yield type is not implemented!"); -} - } // namespace Kernel diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index b29bf7be8..50fa7376b 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -20,124 +20,141 @@ namespace Kernel { class Process; -class Scheduler final { +class GlobalScheduler final { public: - explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core); - ~Scheduler(); - - /// Returns whether there are any threads that are ready to run. - bool HaveReadyThreads() const; - - /// Reschedules to the next available thread (call after current thread is suspended) - void Reschedule(); - - /// Gets the current running thread - Thread* GetCurrentThread() const; - - /// Gets the timestamp for the last context switch in ticks. - u64 GetLastContextSwitchTicks() const; + static constexpr u32 NUM_CPU_CORES = 4; + GlobalScheduler() { + reselection_pending = false; + } + ~GlobalScheduler(); /// Adds a new thread to the scheduler void AddThread(SharedPtr thread); /// Removes a thread from the scheduler void RemoveThread(Thread* thread); - /// Schedules a thread that has become "ready" - void ScheduleThread(Thread* thread, u32 priority); - - /// Unschedules a thread that was already scheduled - void UnscheduleThread(Thread* thread, u32 priority); - - /// Sets the priority of a thread in the scheduler - void SetThreadPriority(Thread* thread, u32 priority); - - /// Gets the next suggested thread for load balancing - Thread* GetNextSuggestedThread(u32 core, u32 minimum_priority) const; - - /** - * YieldWithoutLoadBalancing -- analogous to normal yield on a system - * Moves the thread to the end of the ready queue for its priority, and then reschedules the - * system to the new head of the queue. - * - * Example (Single Core -- but can be extrapolated to multi): - * ready_queue[prio=0]: ThreadA, ThreadB, ThreadC (->exec order->) - * Currently Running: ThreadR - * - * ThreadR calls YieldWithoutLoadBalancing - * - * ThreadR is moved to the end of ready_queue[prio=0]: - * ready_queue[prio=0]: ThreadA, ThreadB, ThreadC, ThreadR (->exec order->) - * Currently Running: Nothing - * - * System is rescheduled (ThreadA is popped off of queue): - * ready_queue[prio=0]: ThreadB, ThreadC, ThreadR (->exec order->) - * Currently Running: ThreadA - * - * If the queue is empty at time of call, no yielding occurs. This does not cross between cores - * or priorities at all. - */ - void YieldWithoutLoadBalancing(Thread* thread); - - /** - * YieldWithLoadBalancing -- yield but with better selection of the new running thread - * Moves the current thread to the end of the ready queue for its priority, then selects a - * 'suggested thread' (a thread on a different core that could run on this core) from the - * scheduler, changes its core, and reschedules the current core to that thread. - * - * Example (Dual Core -- can be extrapolated to Quad Core, this is just normal yield if it were - * single core): - * ready_queue[core=0][prio=0]: ThreadA, ThreadB (affinities not pictured as irrelevant - * ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only] - * Currently Running: ThreadQ on Core 0 || ThreadP on Core 1 - * - * ThreadQ calls YieldWithLoadBalancing - * - * ThreadQ is moved to the end of ready_queue[core=0][prio=0]: - * ready_queue[core=0][prio=0]: ThreadA, ThreadB - * ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only] - * Currently Running: ThreadQ on Core 0 || ThreadP on Core 1 - * - * A list of suggested threads for each core is compiled - * Suggested Threads: {ThreadC on Core 1} - * If this were quad core (as the switch is), there could be between 0 and 3 threads in this - * list. If there are more than one, the thread is selected by highest prio. - * - * ThreadC is core changed to Core 0: - * ready_queue[core=0][prio=0]: ThreadC, ThreadA, ThreadB, ThreadQ - * ready_queue[core=1][prio=0]: ThreadD - * Currently Running: None on Core 0 || ThreadP on Core 1 - * - * System is rescheduled (ThreadC is popped off of queue): - * ready_queue[core=0][prio=0]: ThreadA, ThreadB, ThreadQ - * ready_queue[core=1][prio=0]: ThreadD - * Currently Running: ThreadC on Core 0 || ThreadP on Core 1 - * - * If no suggested threads can be found this will behave just as normal yield. If there are - * multiple candidates for the suggested thread on a core, the highest prio is taken. - */ - void YieldWithLoadBalancing(Thread* thread); - - /// Currently unknown -- asserts as unimplemented on call - void YieldAndWaitForLoadBalancing(Thread* thread); - /// Returns a list of all threads managed by the scheduler const std::vector>& GetThreadList() const { return thread_list; } -private: - /** - * Pops and returns the next thread from the thread queue - * @return A pointer to the next ready thread - */ - Thread* PopNextReadyThread(); + void Suggest(u32 priority, u32 core, Thread* thread) { + suggested_queue[core].add(thread, priority); + } + void Unsuggest(u32 priority, u32 core, Thread* thread) { + suggested_queue[core].remove(thread, priority); + } + + void Schedule(u32 priority, u32 core, Thread* thread) { + ASSERT_MSG(thread->GetProcessorID() == core, + "Thread must be assigned to this core."); + scheduled_queue[core].add(thread, priority); + } + + void SchedulePrepend(u32 priority, u32 core, Thread* thread) { + ASSERT_MSG(thread->GetProcessorID() == core, + "Thread must be assigned to this core."); + scheduled_queue[core].add(thread, priority, false); + } + + void Reschedule(u32 priority, u32 core, Thread* thread) { + scheduled_queue[core].remove(thread, priority); + scheduled_queue[core].add(thread, priority); + } + + void Unschedule(u32 priority, u32 core, Thread* thread) { + scheduled_queue[core].remove(thread, priority); + } + + void TransferToCore(u32 priority, s32 destination_core, Thread* thread) { + bool schedulable = thread->GetPriority() < THREADPRIO_COUNT; + s32 source_core = thread->GetProcessorID(); + if (source_core == destination_core || !schedulable) + return; + thread->SetProcessorID(destination_core); + if (source_core >= 0) + Unschedule(priority, source_core, thread); + if (destination_core >= 0) { + Unsuggest(priority, destination_core, thread); + Schedule(priority, destination_core, thread); + } + if (source_core >= 0) + Suggest(priority, source_core, thread); + } + + void UnloadThread(s32 core); + + void SelectThreads(); + void SelectThread(u32 core); + + bool HaveReadyThreads(u32 core_id) { + return !scheduled_queue[core_id].empty(); + } + + void YieldThread(Thread* thread); + void YieldThreadAndBalanceLoad(Thread* thread); + void YieldThreadAndWaitForLoadBalancing(Thread* thread); + + u32 CpuCoresCount() const { + return NUM_CPU_CORES; + } + + void SetReselectionPending() { + reselection_pending.store(true, std::memory_order_release); + } + + bool IsReselectionPending() { + return reselection_pending.load(std::memory_order_acquire); + } + +private: + void AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner); + + static constexpr u32 min_regular_priority = 2; + std::array, NUM_CPU_CORES> scheduled_queue; + std::array, NUM_CPU_CORES> suggested_queue; + std::atomic reselection_pending; + + /// Lists all thread ids that aren't deleted/etc. + std::vector> thread_list; +}; + +class Scheduler final { +public: + explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, const u32 id); + ~Scheduler(); + + /// Returns whether there are any threads that are ready to run. + bool HaveReadyThreads() const; + + /// Reschedules to the next available thread (call after current thread is suspended) + void TryDoContextSwitch(); + + void UnloadThread(); + + void SelectThreads(); + + /// Gets the current running thread + Thread* GetCurrentThread() const; + + Thread* GetSelectedThread() const; + + /// Gets the timestamp for the last context switch in ticks. + u64 GetLastContextSwitchTicks() const; + + bool ContextSwitchPending() const { + return context_switch_pending; + } + +private: + friend class GlobalScheduler; /** * Switches the CPU's active thread context to that of the specified thread * @param new_thread The thread to switch to */ - void SwitchContext(Thread* new_thread); + void SwitchContext(); /** * Called on every context switch to update the internal timestamp @@ -152,19 +169,16 @@ private: */ void UpdateLastContextSwitchTime(Thread* thread, Process* process); - /// Lists all thread ids that aren't deleted/etc. - std::vector> thread_list; - - /// Lists only ready thread ids. - Common::MultiLevelQueue ready_queue; - SharedPtr current_thread = nullptr; - - Core::ARM_Interface& cpu_core; - u64 last_context_switch_time = 0; + SharedPtr selected_thread = nullptr; Core::System& system; - static std::mutex scheduler_mutex; + Core::ARM_Interface& cpu_core; + u64 last_context_switch_time = 0; + u64 idle_selection_count = 0; + const u32 id; + + bool context_switch_pending = false; }; } // namespace Kernel From a1ac0c6cb47e10863b0bfbb1a6aadc71ccc513ab Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 29 Mar 2019 17:01:46 -0400 Subject: [PATCH 02/29] Addapt thread class to the new Scheduler --- src/core/hle/kernel/thread.cpp | 242 +++++++++++++++++++++++++-------- src/core/hle/kernel/thread.h | 55 +++++++- 2 files changed, 237 insertions(+), 60 deletions(-) diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index ec529e7f2..d0fa7b370 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -45,15 +45,7 @@ void Thread::Stop() { callback_handle); kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); callback_handle = 0; - - // Clean up thread from ready queue - // This is only needed when the thread is terminated forcefully (SVC TerminateProcess) - if (status == ThreadStatus::Ready || status == ThreadStatus::Paused) { - scheduler->UnscheduleThread(this, current_priority); - } - - status = ThreadStatus::Dead; - + SetStatus(ThreadStatus::Dead); WakeupAllWaitingThreads(); // Clean up any dangling references in objects that this thread was waiting for @@ -132,13 +124,11 @@ void Thread::ResumeFromWait() { wakeup_callback = nullptr; if (activity == ThreadActivity::Paused) { - status = ThreadStatus::Paused; + SetStatus(ThreadStatus::Paused); return; } - status = ThreadStatus::Ready; - - ChangeScheduler(); + SetStatus(ThreadStatus::Ready); } void Thread::CancelWait() { @@ -205,9 +195,9 @@ ResultVal> Thread::Create(KernelCore& kernel, std::string name thread->name = std::move(name); thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap(); thread->owner_process = &owner_process; + auto& scheduler = kernel.GlobalScheduler(); + scheduler.AddThread(thread); thread->tls_address = thread->owner_process->CreateTLSRegion(); - thread->scheduler = &system.Scheduler(processor_id); - thread->scheduler->AddThread(thread); thread->owner_process->RegisterThread(thread.get()); @@ -250,6 +240,22 @@ void Thread::SetStatus(ThreadStatus new_status) { return; } + switch (new_status) { + case ThreadStatus::Ready: + case ThreadStatus::Running: + SetSchedulingStatus(ThreadSchedStatus::Runnable); + break; + case ThreadStatus::Dormant: + SetSchedulingStatus(ThreadSchedStatus::None); + break; + case ThreadStatus::Dead: + SetSchedulingStatus(ThreadSchedStatus::Exited); + break; + default: + SetSchedulingStatus(ThreadSchedStatus::Paused); + break; + } + if (status == ThreadStatus::Running) { last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); } @@ -311,8 +317,7 @@ void Thread::UpdatePriority() { return; } - scheduler->SetThreadPriority(this, new_priority); - current_priority = new_priority; + SetCurrentPriority(new_priority); if (!lock_owner) { return; @@ -328,47 +333,7 @@ void Thread::UpdatePriority() { } void Thread::ChangeCore(u32 core, u64 mask) { - ideal_core = core; - affinity_mask = mask; - ChangeScheduler(); -} - -void Thread::ChangeScheduler() { - if (status != ThreadStatus::Ready) { - return; - } - - auto& system = Core::System::GetInstance(); - std::optional new_processor_id{GetNextProcessorId(affinity_mask)}; - - if (!new_processor_id) { - new_processor_id = processor_id; - } - if (ideal_core != -1 && system.Scheduler(ideal_core).GetCurrentThread() == nullptr) { - new_processor_id = ideal_core; - } - - ASSERT(*new_processor_id < 4); - - // Add thread to new core's scheduler - auto& next_scheduler = system.Scheduler(*new_processor_id); - - if (*new_processor_id != processor_id) { - // Remove thread from previous core's scheduler - scheduler->RemoveThread(this); - next_scheduler.AddThread(this); - } - - processor_id = *new_processor_id; - - // If the thread was ready, unschedule from the previous core and schedule on the new core - scheduler->UnscheduleThread(this, current_priority); - next_scheduler.ScheduleThread(this, current_priority); - - // Change thread's scheduler - scheduler = &next_scheduler; - - system.CpuCore(processor_id).PrepareReschedule(); + SetCoreAndAffinityMask(core, mask); } bool Thread::AllWaitObjectsReady() const { @@ -391,7 +356,7 @@ void Thread::SetActivity(ThreadActivity value) { if (status == ThreadStatus::Ready) { status = ThreadStatus::Paused; } else if (status == ThreadStatus::Running) { - status = ThreadStatus::Paused; + SetStatus(ThreadStatus::Paused); Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule(); } } else if (status == ThreadStatus::Paused) { @@ -408,6 +373,165 @@ void Thread::Sleep(s64 nanoseconds) { WakeAfterDelay(nanoseconds); } +void Thread::YieldType0() { + auto& scheduler = kernel.GlobalScheduler(); + scheduler.YieldThread(this); +} + +void Thread::YieldType1() { + auto& scheduler = kernel.GlobalScheduler(); + scheduler.YieldThreadAndBalanceLoad(this); +} + +void Thread::YieldType2() { + auto& scheduler = kernel.GlobalScheduler(); + scheduler.YieldThreadAndWaitForLoadBalancing(this); +} + +void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) { + u32 old_flags = scheduling_state; + scheduling_state = + (scheduling_state & ThreadSchedMasks::HighMask) | static_cast(new_status); + AdjustSchedulingOnStatus(old_flags); +} + +void Thread::SetCurrentPriority(u32 new_priority) { + u32 old_priority = current_priority; + current_priority = new_priority; + AdjustSchedulingOnPriority(old_priority); +} + +ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { + auto HighestSetCore = [](u64 mask, u32 max_cores) { + for (s32 core = max_cores - 1; core >= 0; core--) { + if (((mask >> core) & 1) != 0) + return core; + } + return -1; + }; + bool use_override = affinity_override_count != 0; + // The value -3 is "do not change the ideal core". + if (new_core == -3) { + new_core = use_override ? ideal_core_override : ideal_core; + if ((new_affinity_mask & (1 << new_core)) == 0) { + return ERR_INVALID_COMBINATION; + } + } + if (use_override) { + ideal_core_override = new_core; + affinity_mask_override = new_affinity_mask; + } else { + u64 old_affinity_mask = affinity_mask; + ideal_core = new_core; + affinity_mask = new_affinity_mask; + if (old_affinity_mask != new_affinity_mask) { + s32 old_core = processor_id; + if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) { + if (ideal_core < 0) { + processor_id = HighestSetCore(affinity_mask, GlobalScheduler::NUM_CPU_CORES); + } else { + processor_id = ideal_core; + } + } + AdjustSchedulingOnAffinity(old_affinity_mask, old_core); + } + } + return RESULT_SUCCESS; +} + +void Thread::AdjustSchedulingOnStatus(u32 old_flags) { + if (old_flags == scheduling_state) + return; + + auto& scheduler = kernel.GlobalScheduler(); + if (static_cast(old_flags & ThreadSchedMasks::LowMask) == + ThreadSchedStatus::Runnable) { + // In this case the thread was running, now it's pausing/exitting + if (processor_id >= 0) + scheduler.Unschedule(current_priority, processor_id, this); + + for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + if (core != processor_id && ((affinity_mask >> core) & 1) != 0) + scheduler.Unsuggest(current_priority, core, this); + } + } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) { + // The thread is now set to running from being stopped + if (processor_id >= 0) + scheduler.Schedule(current_priority, processor_id, this); + + for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + if (core != processor_id && ((affinity_mask >> core) & 1) != 0) + scheduler.Suggest(current_priority, core, this); + } + } + + scheduler.SetReselectionPending(); +} + +void Thread::AdjustSchedulingOnPriority(u32 old_priority) { + if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { + return; + } + auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + if (processor_id >= 0) { + scheduler.Unschedule(old_priority, processor_id, this); + } + + for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + if (core != processor_id && ((affinity_mask >> core) & 1) != 0) { + scheduler.Unsuggest(old_priority, core, this); + } + } + + // Add thread to the new priority queues. + Thread* current_thread = GetCurrentThread(); + + if (processor_id >= 0) { + if (current_thread == this) { + scheduler.SchedulePrepend(current_priority, processor_id, this); + } else { + scheduler.Schedule(current_priority, processor_id, this); + } + } + + for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + if (core != processor_id && ((affinity_mask >> core) & 1) != 0) { + scheduler.Suggest(current_priority, core, this); + } + } + + scheduler.SetReselectionPending(); +} + +void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { + auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || + current_priority >= THREADPRIO_COUNT) + return; + + for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + if (((old_affinity_mask >> core) & 1) != 0) { + if (core == old_core) { + scheduler.Unschedule(current_priority, core, this); + } else { + scheduler.Unsuggest(current_priority, core, this); + } + } + } + + for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + if (((affinity_mask >> core) & 1) != 0) { + if (core == processor_id) { + scheduler.Schedule(current_priority, core, this); + } else { + scheduler.Suggest(current_priority, core, this); + } + } + } + + scheduler.SetReselectionPending(); +} + //////////////////////////////////////////////////////////////////////////////////////////////////// /** diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 07e989637..c426a7209 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -75,6 +75,21 @@ enum class ThreadActivity : u32 { Paused = 1, }; +enum class ThreadSchedStatus : u32 { None = 0, Paused = 1, Runnable = 2, Exited = 3 }; + +enum ThreadSchedFlags : u32 { + ProcessPauseFlag = 1 << 4, + ThreadPauseFlag = 1 << 5, + ProcessDebugPauseFlag = 1 << 6, + KernelInitPauseFlag = 1 << 8, +}; + +enum ThreadSchedMasks : u32 { + LowMask = 0x000f, + HighMask = 0xfff0, + ForcePauseMask = 0x0070, +}; + class Thread final : public WaitObject { public: using MutexWaitingThreads = std::vector>; @@ -278,6 +293,10 @@ public: return processor_id; } + void SetProcessorID(s32 new_core) { + processor_id = new_core; + } + Process* GetOwnerProcess() { return owner_process; } @@ -383,11 +402,38 @@ public: /// Sleeps this thread for the given amount of nanoseconds. void Sleep(s64 nanoseconds); + /// Yields this thread without rebalancing loads. + void YieldType0(); + + /// Yields this thread and does a load rebalancing. + void YieldType1(); + + /// Yields this thread and if the core is left idle, loads are rebalanced + void YieldType2(); + + ThreadSchedStatus GetSchedulingStatus() { + return static_cast(scheduling_state & ThreadSchedMasks::LowMask); + } + + bool IsRunning() const { + return is_running; + } + + void SetIsRunning(bool value) { + is_running = value; + } + private: explicit Thread(KernelCore& kernel); ~Thread() override; - void ChangeScheduler(); + void SetSchedulingStatus(ThreadSchedStatus new_status); + void SetCurrentPriority(u32 new_priority); + ResultCode SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask); + + void AdjustSchedulingOnStatus(u32 old_flags); + void AdjustSchedulingOnPriority(u32 old_priority); + void AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core); Core::ARM_Interface::ThreadContext context{}; @@ -453,6 +499,13 @@ private: ThreadActivity activity = ThreadActivity::Normal; + s32 ideal_core_override = -1; + u64 affinity_mask_override = 0x1; + u32 affinity_override_count = 0; + + u32 scheduling_state = 0; + bool is_running = false; + std::string name; }; From 57a71f899a95ccaa2984c1cb35c083221a29fd6e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 29 Mar 2019 17:02:57 -0400 Subject: [PATCH 03/29] Add interfacing to the Global Scheduler --- src/core/core.cpp | 10 ++++++++++ src/core/core.h | 7 +++++++ src/core/hle/kernel/kernel.cpp | 10 ++++++++++ src/core/hle/kernel/kernel.h | 7 +++++++ 4 files changed, 34 insertions(+) diff --git a/src/core/core.cpp b/src/core/core.cpp index 4d0ac72a5..5565840fd 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -444,6 +444,16 @@ const Kernel::Scheduler& System::Scheduler(std::size_t core_index) const { return CpuCore(core_index).Scheduler(); } +/// Gets the global scheduler +Kernel::GlobalScheduler& System::GlobalScheduler() { + return impl->kernel.GlobalScheduler(); +} + +/// Gets the global scheduler +const Kernel::GlobalScheduler& System::GlobalScheduler() const { + return impl->kernel.GlobalScheduler(); +} + Kernel::Process* System::CurrentProcess() { return impl->kernel.CurrentProcess(); } diff --git a/src/core/core.h b/src/core/core.h index 90e7ac607..2a002f6d7 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -27,6 +27,7 @@ namespace Kernel { class KernelCore; class Process; class Scheduler; +class GlobalScheduler; } // namespace Kernel namespace Loader { @@ -238,6 +239,12 @@ public: /// Gets the scheduler for the CPU core with the specified index const Kernel::Scheduler& Scheduler(std::size_t core_index) const; + /// Gets the global scheduler + Kernel::GlobalScheduler& GlobalScheduler(); + + /// Gets the global scheduler + const Kernel::GlobalScheduler& GlobalScheduler() const; + /// Provides a pointer to the current process Kernel::Process* CurrentProcess(); diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 799e5e0d8..b4fd1d3f3 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -18,6 +18,7 @@ #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/resource_limit.h" +#include "core/hle/kernel/scheduler.h" #include "core/hle/kernel/thread.h" #include "core/hle/lock.h" #include "core/hle/result.h" @@ -140,6 +141,7 @@ struct KernelCore::Impl { // Lists all processes that exist in the current session. std::vector> process_list; Process* current_process = nullptr; + Kernel::GlobalScheduler global_scheduler; SharedPtr system_resource_limit; @@ -203,6 +205,14 @@ const std::vector>& KernelCore::GetProcessList() const { return impl->process_list; } +Kernel::GlobalScheduler& KernelCore::GlobalScheduler() { + return impl->global_scheduler; +} + +const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const { + return impl->global_scheduler; +} + void KernelCore::AddNamedPort(std::string name, SharedPtr port) { impl->named_ports.emplace(std::move(name), std::move(port)); } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 0cc44ee76..f9f5bdc88 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -25,6 +25,7 @@ class HandleTable; class Process; class ResourceLimit; class Thread; +class GlobalScheduler; /// Represents a single instance of the kernel. class KernelCore { @@ -75,6 +76,12 @@ public: /// Retrieves the list of processes. const std::vector>& GetProcessList() const; + /// Gets the sole instance of the global scheduler + Kernel::GlobalScheduler& GlobalScheduler(); + + /// Gets the sole instance of the global scheduler + const Kernel::GlobalScheduler& GlobalScheduler() const; + /// Adds a port to the named port table void AddNamedPort(std::string name, SharedPtr port); From 47c6c78c031b33af877a64aa1da2705558ab02c2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 29 Mar 2019 17:09:10 -0400 Subject: [PATCH 04/29] Redesign CPU Cores to work with the new scheduler --- src/core/core_cpu.cpp | 23 ++++++++++------------- src/core/core_cpu.h | 2 ++ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index 6bd9639c6..2a7c3af24 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp @@ -52,7 +52,8 @@ bool CpuBarrier::Rendezvous() { Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_barrier, std::size_t core_index) - : cpu_barrier{cpu_barrier}, core_timing{system.CoreTiming()}, core_index{core_index} { + : cpu_barrier{cpu_barrier}, global_scheduler{system.GlobalScheduler()}, + core_timing{system.CoreTiming()}, core_index{core_index} { #ifdef ARCHITECTURE_x86_64 arm_interface = std::make_unique(system, exclusive_monitor, core_index); #else @@ -60,7 +61,7 @@ Cpu::Cpu(System& system, ExclusiveMonitor& exclusive_monitor, CpuBarrier& cpu_ba LOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available"); #endif - scheduler = std::make_unique(system, *arm_interface); + scheduler = std::make_unique(system, *arm_interface, core_index); } Cpu::~Cpu() = default; @@ -81,21 +82,21 @@ void Cpu::RunLoop(bool tight_loop) { return; } + Reschedule(); + // If we don't have a currently active thread then don't execute instructions, // instead advance to the next event and try to yield to the next thread if (Kernel::GetCurrentThread() == nullptr) { LOG_TRACE(Core, "Core-{} idling", core_index); core_timing.Idle(); - core_timing.Advance(); - PrepareReschedule(); } else { if (tight_loop) { arm_interface->Run(); } else { arm_interface->Step(); } - core_timing.Advance(); } + core_timing.Advance(); Reschedule(); } @@ -106,18 +107,14 @@ void Cpu::SingleStep() { void Cpu::PrepareReschedule() { arm_interface->PrepareReschedule(); - reschedule_pending = true; } void Cpu::Reschedule() { - if (!reschedule_pending) { - return; - } - - reschedule_pending = false; // Lock the global kernel mutex when we manipulate the HLE state - std::lock_guard lock{HLE::g_hle_lock}; - scheduler->Reschedule(); + std::lock_guard lock(HLE::g_hle_lock); + + global_scheduler.SelectThread(core_index); + scheduler->TryDoContextSwitch(); } } // namespace Core diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h index 7589beb8c..5dde2994c 100644 --- a/src/core/core_cpu.h +++ b/src/core/core_cpu.h @@ -13,6 +13,7 @@ namespace Kernel { class Scheduler; +class GlobalScheduler; } namespace Core { @@ -90,6 +91,7 @@ private: std::unique_ptr arm_interface; CpuBarrier& cpu_barrier; + Kernel::GlobalScheduler& global_scheduler; std::unique_ptr scheduler; Timing::CoreTiming& core_timing; From 9031502974fa25c9c8521ad96ecc8126fcac51c6 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 29 Mar 2019 17:11:25 -0400 Subject: [PATCH 05/29] Correct Supervisor Calls to work with the new scheduler, --- src/core/hle/kernel/svc.cpp | 67 +++++++++++++++++++++++-------------- 1 file changed, 41 insertions(+), 26 deletions(-) diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 1fd1a732a..ee1e9f006 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -534,6 +534,8 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand } thread->CancelWait(); + if (thread->GetProcessorID() >= 0) + Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); return RESULT_SUCCESS; } @@ -1066,6 +1068,9 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act } thread->SetActivity(static_cast(activity)); + + if (thread->GetProcessorID() >= 0) + Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); return RESULT_SUCCESS; } @@ -1147,7 +1152,8 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri thread->SetPriority(priority); - system.CpuCore(thread->GetProcessorID()).PrepareReschedule(); + if (thread->GetProcessorID() >= 0) + Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); return RESULT_SUCCESS; } @@ -1503,7 +1509,8 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e thread->SetName( fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle)); - system.CpuCore(thread->GetProcessorID()).PrepareReschedule(); + if (thread->GetProcessorID() >= 0) + Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); return RESULT_SUCCESS; } @@ -1525,7 +1532,10 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) { thread->ResumeFromWait(); if (thread->GetStatus() == ThreadStatus::Ready) { - system.CpuCore(thread->GetProcessorID()).PrepareReschedule(); + if (thread->GetProcessorID() >= 0) + Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + else + Core::System::GetInstance().GlobalScheduler().SetReselectionPending(); } return RESULT_SUCCESS; @@ -1537,7 +1547,7 @@ static void ExitThread(Core::System& system) { auto* const current_thread = system.CurrentScheduler().GetCurrentThread(); current_thread->Stop(); - system.CurrentScheduler().RemoveThread(current_thread); + system.GlobalScheduler().RemoveThread(current_thread); system.PrepareReschedule(); } @@ -1557,13 +1567,13 @@ static void SleepThread(Core::System& system, s64 nanoseconds) { if (nanoseconds <= 0) { switch (static_cast(nanoseconds)) { case SleepType::YieldWithoutLoadBalancing: - scheduler.YieldWithoutLoadBalancing(current_thread); + current_thread->YieldType0(); break; case SleepType::YieldWithLoadBalancing: - scheduler.YieldWithLoadBalancing(current_thread); + current_thread->YieldType1(); break; case SleepType::YieldAndWaitForLoadBalancing: - scheduler.YieldAndWaitForLoadBalancing(current_thread); + current_thread->YieldType2(); break; default: UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); @@ -1632,24 +1642,16 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}", condition_variable_addr, target); - const auto RetrieveWaitingThreads = [&system](std::size_t core_index, - std::vector>& waiting_threads, - VAddr condvar_addr) { - const auto& scheduler = system.Scheduler(core_index); - const auto& thread_list = scheduler.GetThreadList(); - - for (const auto& thread : thread_list) { - if (thread->GetCondVarWaitAddress() == condvar_addr) - waiting_threads.push_back(thread); - } - }; - // Retrieve a list of all threads that are waiting for this condition variable. std::vector> waiting_threads; - RetrieveWaitingThreads(0, waiting_threads, condition_variable_addr); - RetrieveWaitingThreads(1, waiting_threads, condition_variable_addr); - RetrieveWaitingThreads(2, waiting_threads, condition_variable_addr); - RetrieveWaitingThreads(3, waiting_threads, condition_variable_addr); + const auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + const auto& thread_list = scheduler.GetThreadList(); + + for (const auto& thread : thread_list) { + if (thread->GetCondVarWaitAddress() == condition_variable_addr) + waiting_threads.push_back(thread); + } + // Sort them by priority, such that the highest priority ones come first. std::sort(waiting_threads.begin(), waiting_threads.end(), [](const SharedPtr& lhs, const SharedPtr& rhs) { @@ -1704,7 +1706,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var thread->SetLockOwner(nullptr); thread->SetMutexWaitAddress(0); thread->SetWaitHandle(0); - system.CpuCore(thread->GetProcessorID()).PrepareReschedule(); + if (thread->GetProcessorID() >= 0) + Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); } else { // Atomically signal that the mutex now has a waiting thread. do { @@ -1728,6 +1731,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var thread->SetStatus(ThreadStatus::WaitMutex); owner->AddMutexWaiter(thread); + if (thread->GetProcessorID() >= 0) + Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); } } @@ -1753,8 +1758,14 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, } const auto arbitration_type = static_cast(type); - auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter(); - return address_arbiter.WaitForAddress(address, arbitration_type, value, timeout); + auto& address_arbiter = + system.Kernel().CurrentProcess()->GetAddressArbiter(); + ResultCode result = address_arbiter.WaitForAddress(address, arbitration_type, value, timeout); + if (result == RESULT_SUCCESS) + Core::System::GetInstance() + .CpuCore(GetCurrentThread()->GetProcessorID()) + .PrepareReschedule(); + return result; } // Signals to an address (via Address Arbiter) @@ -2040,7 +2051,10 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, return ERR_INVALID_HANDLE; } + Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); thread->ChangeCore(core, affinity_mask); + if (thread->GetProcessorID() >= 0) + Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); return RESULT_SUCCESS; } @@ -2151,6 +2165,7 @@ static ResultCode SignalEvent(Core::System& system, Handle handle) { } writable_event->Signal(); + Core::System::GetInstance().PrepareReschedule(); return RESULT_SUCCESS; } From b8b7ebcece955316680a09eb68b891e0acff9fcc Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 29 Mar 2019 17:12:02 -0400 Subject: [PATCH 06/29] Correct compiling errors and addapt to the new interface. --- src/core/gdbstub/gdbstub.cpp | 32 +++++++++++++------------------- src/core/hle/kernel/process.cpp | 5 +---- src/yuzu/debugger/wait_tree.cpp | 5 +---- 3 files changed, 15 insertions(+), 27 deletions(-) diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index db51d722f..20bb50868 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -202,13 +202,11 @@ void RegisterModule(std::string name, VAddr beg, VAddr end, bool add_elf_ext) { } static Kernel::Thread* FindThreadById(s64 id) { - for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) { - const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList(); - for (auto& thread : threads) { - if (thread->GetThreadID() == static_cast(id)) { - current_core = core; - return thread.get(); - } + const auto& threads = Core::System::GetInstance().GlobalScheduler().GetThreadList(); + for (auto& thread : threads) { + if (thread->GetThreadID() == static_cast(id)) { + current_core = thread->GetProcessorID(); + return thread.get(); } } return nullptr; @@ -647,11 +645,9 @@ static void HandleQuery() { SendReply(buffer.c_str()); } else if (strncmp(query, "fThreadInfo", strlen("fThreadInfo")) == 0) { std::string val = "m"; - for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) { - const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList(); - for (const auto& thread : threads) { - val += fmt::format("{:x},", thread->GetThreadID()); - } + const auto& threads = Core::System::GetInstance().GlobalScheduler().GetThreadList(); + for (const auto& thread : threads) { + val += fmt::format("{:x},", thread->GetThreadID()); } val.pop_back(); SendReply(val.c_str()); @@ -661,13 +657,11 @@ static void HandleQuery() { std::string buffer; buffer += "l"; buffer += ""; - for (u32 core = 0; core < Core::NUM_CPU_CORES; core++) { - const auto& threads = Core::System::GetInstance().Scheduler(core).GetThreadList(); - for (const auto& thread : threads) { - buffer += - fmt::format(R"*()*", - thread->GetThreadID(), core, thread->GetThreadID()); - } + const auto& threads = Core::System::GetInstance().GlobalScheduler().GetThreadList(); + for (const auto& thread : threads) { + buffer += + fmt::format(R"*()*", + thread->GetThreadID(), thread->GetProcessorID(), thread->GetThreadID()); } buffer += ""; SendReply(buffer.c_str()); diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index e80a12ac3..12a900bcc 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -213,10 +213,7 @@ void Process::PrepareForTermination() { } }; - stop_threads(system.Scheduler(0).GetThreadList()); - stop_threads(system.Scheduler(1).GetThreadList()); - stop_threads(system.Scheduler(2).GetThreadList()); - stop_threads(system.Scheduler(3).GetThreadList()); + stop_threads(system.GlobalScheduler().GetThreadList()); FreeTLSRegion(tls_region_address); tls_region_address = 0; diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp index cd8180f8b..c5b9aa08f 100644 --- a/src/yuzu/debugger/wait_tree.cpp +++ b/src/yuzu/debugger/wait_tree.cpp @@ -66,10 +66,7 @@ std::vector> WaitTreeItem::MakeThreadItemList() }; const auto& system = Core::System::GetInstance(); - add_threads(system.Scheduler(0).GetThreadList()); - add_threads(system.Scheduler(1).GetThreadList()); - add_threads(system.Scheduler(2).GetThreadList()); - add_threads(system.Scheduler(3).GetThreadList()); + add_threads(system.GlobalScheduler().GetThreadList()); return item_list; } From b5d1e447821eb21158669e0ef1d24d630602f1fe Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 29 Mar 2019 17:13:00 -0400 Subject: [PATCH 07/29] Add PrepareReschedule where required. --- src/core/hle/kernel/address_arbiter.cpp | 28 +++++++++++-------------- src/core/hle/kernel/mutex.cpp | 2 ++ src/core/hle/kernel/wait_object.cpp | 4 ++++ 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index c8842410b..77f7bb451 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -22,6 +22,8 @@ namespace Kernel { namespace { // Wake up num_to_wake (or all) threads in a vector. void WakeThreads(const std::vector>& waiting_threads, s32 num_to_wake) { + + auto& system = Core::System::GetInstance(); // Only process up to 'target' threads, unless 'target' is <= 0, in which case process // them all. std::size_t last = waiting_threads.size(); @@ -35,6 +37,8 @@ void WakeThreads(const std::vector>& waiting_threads, s32 num_ waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS); waiting_threads[i]->SetArbiterWaitAddress(0); waiting_threads[i]->ResumeFromWait(); + if (waiting_threads[i]->GetProcessorID() >= 0) + system.CpuCore(waiting_threads[i]->GetProcessorID()).PrepareReschedule(); } } } // Anonymous namespace @@ -174,25 +178,17 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) { } std::vector> AddressArbiter::GetThreadsWaitingOnAddress(VAddr address) const { - const auto RetrieveWaitingThreads = [this](std::size_t core_index, - std::vector>& waiting_threads, - VAddr arb_addr) { - const auto& scheduler = system.Scheduler(core_index); - const auto& thread_list = scheduler.GetThreadList(); - - for (const auto& thread : thread_list) { - if (thread->GetArbiterWaitAddress() == arb_addr) { - waiting_threads.push_back(thread); - } - } - }; // Retrieve all threads that are waiting for this address. std::vector> threads; - RetrieveWaitingThreads(0, threads, address); - RetrieveWaitingThreads(1, threads, address); - RetrieveWaitingThreads(2, threads, address); - RetrieveWaitingThreads(3, threads, address); + const auto& scheduler = system.GlobalScheduler(); + const auto& thread_list = scheduler.GetThreadList(); + + for (const auto& thread : thread_list) { + if (thread->GetArbiterWaitAddress() == address) { + threads.push_back(thread); + } + } // Sort them by priority, such that the highest priority ones come first. std::sort(threads.begin(), threads.end(), diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp index 98e87313b..57f2d8bf3 100644 --- a/src/core/hle/kernel/mutex.cpp +++ b/src/core/hle/kernel/mutex.cpp @@ -140,6 +140,8 @@ ResultCode Mutex::Release(VAddr address) { thread->SetMutexWaitAddress(0); thread->SetWaitHandle(0); + Core::System::GetInstance().PrepareReschedule(); + return RESULT_SUCCESS; } } // namespace Kernel diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp index 0e96ba872..e035a67e9 100644 --- a/src/core/hle/kernel/wait_object.cpp +++ b/src/core/hle/kernel/wait_object.cpp @@ -6,6 +6,8 @@ #include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" +#include "core/core.h" +#include "core/core_cpu.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/thread.h" @@ -95,6 +97,8 @@ void WaitObject::WakeupWaitingThread(SharedPtr thread) { } if (resume) { thread->ResumeFromWait(); + if (thread->GetProcessorID() >= 0) + Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); } } From 3a94e7ea3386cbd14e74255e0a4c7f8615a396c9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 2 Apr 2019 08:03:44 -0400 Subject: [PATCH 08/29] Comment and reorganize the scheduler --- src/core/hle/kernel/scheduler.cpp | 170 +++++++++++++----------------- src/core/hle/kernel/scheduler.h | 38 ++++++- 2 files changed, 107 insertions(+), 101 deletions(-) diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 878aeed6d..537640152 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -19,6 +19,11 @@ namespace Kernel { +/* + * SelectThreads, Yield functions originally by TuxSH. + * licensed under GPLv2 or later under exception provided by the author. + */ + void GlobalScheduler::AddThread(SharedPtr thread) { thread_list.push_back(std::move(thread)); } @@ -29,15 +34,23 @@ void GlobalScheduler::RemoveThread(Thread* thread) { } /* - * SelectThreads, Yield functions originally by TuxSH. - * licensed under GPLv2 or later under exception provided by the author. + * UnloadThread selects a core and forces it to unload its current thread's context */ - void GlobalScheduler::UnloadThread(s32 core) { Scheduler& sched = Core::System::GetInstance().Scheduler(core); sched.UnloadThread(); } +/* + * SelectThread takes care of selecting the new scheduled thread. + * It does it in 3 steps: + * - First a thread is selected from the top of the priority queue. If no thread + * is obtained then we move to step two, else we are done. + * - Second we try to get a suggested thread that's not assigned to any core or + * that is not the top thread in that core. + * - Third is no suggested thread is found, we do a second pass and pick a running + * thread in another core and swap it with its current thread. + */ void GlobalScheduler::SelectThread(u32 core) { auto update_thread = [](Thread* thread, Scheduler& sched) { if (thread != sched.selected_thread) { @@ -51,105 +64,58 @@ void GlobalScheduler::SelectThread(u32 core) { }; Scheduler& sched = Core::System::GetInstance().Scheduler(core); Thread* current_thread = nullptr; + // Step 1: Get top thread in schedule queue. current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front(); - if (!current_thread) { - Thread* winner = nullptr; - std::set sug_cores; - for (auto thread : suggested_queue[core]) { - s32 this_core = thread->GetProcessorID(); - Thread* thread_on_core = nullptr; - if (this_core >= 0) { - thread_on_core = scheduled_queue[this_core].front(); - } - if (this_core < 0 || thread != thread_on_core) { - winner = thread; - break; - } - sug_cores.insert(this_core); + if (current_thread) { + update_thread(current_thread, sched); + return; + } + // Step 2: Try selecting a suggested thread. + Thread* winner = nullptr; + std::set sug_cores; + for (auto thread : suggested_queue[core]) { + s32 this_core = thread->GetProcessorID(); + Thread* thread_on_core = nullptr; + if (this_core >= 0) { + thread_on_core = scheduled_queue[this_core].front(); } - if (winner && winner->GetPriority() > 2) { - if (winner->IsRunning()) { - UnloadThread(winner->GetProcessorID()); - } - TransferToCore(winner->GetPriority(), core, winner); - current_thread = winner; - } else { - for (auto& src_core : sug_cores) { - auto it = scheduled_queue[src_core].begin(); - it++; - if (it != scheduled_queue[src_core].end()) { - Thread* thread_on_core = scheduled_queue[src_core].front(); - Thread* to_change = *it; - if (thread_on_core->IsRunning() || to_change->IsRunning()) { - UnloadThread(src_core); - } - TransferToCore(thread_on_core->GetPriority(), core, thread_on_core); - current_thread = thread_on_core; - } + if (this_core < 0 || thread != thread_on_core) { + winner = thread; + break; + } + sug_cores.insert(this_core); + } + // if we got a suggested thread, select it, else do a second pass. + if (winner && winner->GetPriority() > 2) { + if (winner->IsRunning()) { + UnloadThread(winner->GetProcessorID()); + } + TransferToCore(winner->GetPriority(), core, winner); + update_thread(winner, sched); + return; + } + // Step 3: Select a suggested thread from another core + for (auto& src_core : sug_cores) { + auto it = scheduled_queue[src_core].begin(); + it++; + if (it != scheduled_queue[src_core].end()) { + Thread* thread_on_core = scheduled_queue[src_core].front(); + Thread* to_change = *it; + if (thread_on_core->IsRunning() || to_change->IsRunning()) { + UnloadThread(src_core); } + TransferToCore(thread_on_core->GetPriority(), core, thread_on_core); + current_thread = thread_on_core; + break; } } update_thread(current_thread, sched); } -void GlobalScheduler::SelectThreads() { - auto update_thread = [](Thread* thread, Scheduler& sched) { - if (thread != sched.selected_thread) { - if (thread == nullptr) { - ++sched.idle_selection_count; - } - sched.selected_thread = thread; - } - sched.context_switch_pending = sched.selected_thread != sched.current_thread; - std::atomic_thread_fence(std::memory_order_seq_cst); - }; - - auto& system = Core::System::GetInstance(); - - std::unordered_set picked_threads; - // This maintain the "current thread is on front of queue" invariant - std::array current_threads; - for (u32 i = 0; i < NUM_CPU_CORES; i++) { - Scheduler& sched = system.Scheduler(i); - current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front(); - if (current_threads[i]) - picked_threads.insert(current_threads[i]); - update_thread(current_threads[i], sched); - } - - // Do some load-balancing. Allow second pass. - std::array current_threads_2 = current_threads; - for (u32 i = 0; i < NUM_CPU_CORES; i++) { - if (!scheduled_queue[i].empty()) { - continue; - } - Thread* winner = nullptr; - for (auto thread : suggested_queue[i]) { - if (thread->GetProcessorID() < 0 || thread != current_threads[i]) { - if (picked_threads.count(thread) == 0 && !thread->IsRunning()) { - winner = thread; - break; - } - } - } - if (winner) { - TransferToCore(winner->GetPriority(), i, winner); - current_threads_2[i] = winner; - picked_threads.insert(winner); - } - } - - // See which to-be-current threads have changed & update accordingly - for (u32 i = 0; i < NUM_CPU_CORES; i++) { - Scheduler& sched = system.Scheduler(i); - if (current_threads_2[i] != current_threads[i]) { - update_thread(current_threads_2[i], sched); - } - } - - reselection_pending.store(false, std::memory_order_release); -} - +/* + * YieldThread takes a thread and moves it to the back of the it's priority list + * This operation can be redundant and no scheduling is changed if marked as so. + */ void GlobalScheduler::YieldThread(Thread* yielding_thread) { // Note: caller should use critical section, etc. u32 core_id = static_cast(yielding_thread->GetProcessorID()); @@ -164,6 +130,12 @@ void GlobalScheduler::YieldThread(Thread* yielding_thread) { AskForReselectionOrMarkRedundant(yielding_thread, winner); } +/* + * YieldThreadAndBalanceLoad takes a thread and moves it to the back of the it's priority list. + * Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or + * a better priority than the next thread in the core. + * This operation can be redundant and no scheduling is changed if marked as so. + */ void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, // etc. @@ -213,6 +185,12 @@ void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { AskForReselectionOrMarkRedundant(yielding_thread, winner); } +/* + * YieldThreadAndWaitForLoadBalancing takes a thread and moves it out of the scheduling queue + * and into the suggested queue. If no thread can be squeduled afterwards in that core, + * a suggested thread is obtained instead. + * This operation can be redundant and no scheduling is changed if marked as so. + */ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) { // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, // etc. @@ -256,8 +234,8 @@ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread void GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) { if (current_thread == winner) { - // Nintendo (not us) has a nullderef bug on current_thread->owner, but which is never - // triggered. + // TODO(blinkhawk): manage redundant operations, this is not implemented. + // as its mostly an optimization. // current_thread->SetRedundantSchedulerOperation(); } else { reselection_pending.store(true, std::memory_order_release); diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 50fa7376b..82ed64b55 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -48,14 +48,12 @@ public: } void Schedule(u32 priority, u32 core, Thread* thread) { - ASSERT_MSG(thread->GetProcessorID() == core, - "Thread must be assigned to this core."); + ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core."); scheduled_queue[core].add(thread, priority); } void SchedulePrepend(u32 priority, u32 core, Thread* thread) { - ASSERT_MSG(thread->GetProcessorID() == core, - "Thread must be assigned to this core."); + ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core."); scheduled_queue[core].add(thread, priority, false); } @@ -84,17 +82,47 @@ public: Suggest(priority, source_core, thread); } + /* + * UnloadThread selects a core and forces it to unload its current thread's context + */ void UnloadThread(s32 core); - void SelectThreads(); + /* + * SelectThread takes care of selecting the new scheduled thread. + * It does it in 3 steps: + * - First a thread is selected from the top of the priority queue. If no thread + * is obtained then we move to step two, else we are done. + * - Second we try to get a suggested thread that's not assigned to any core or + * that is not the top thread in that core. + * - Third is no suggested thread is found, we do a second pass and pick a running + * thread in another core and swap it with its current thread. + */ void SelectThread(u32 core); bool HaveReadyThreads(u32 core_id) { return !scheduled_queue[core_id].empty(); } + /* + * YieldThread takes a thread and moves it to the back of the it's priority list + * This operation can be redundant and no scheduling is changed if marked as so. + */ void YieldThread(Thread* thread); + + /* + * YieldThreadAndBalanceLoad takes a thread and moves it to the back of the it's priority list. + * Afterwards, tries to pick a suggested thread from the suggested queue that has worse time or + * a better priority than the next thread in the core. + * This operation can be redundant and no scheduling is changed if marked as so. + */ void YieldThreadAndBalanceLoad(Thread* thread); + + /* + * YieldThreadAndWaitForLoadBalancing takes a thread and moves it out of the scheduling queue + * and into the suggested queue. If no thread can be squeduled afterwards in that core, + * a suggested thread is obtained instead. + * This operation can be redundant and no scheduling is changed if marked as so. + */ void YieldThreadAndWaitForLoadBalancing(Thread* thread); u32 CpuCoresCount() const { From fcc6b34fff3c9322a35e6457a699e70585a7e014 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 2 Apr 2019 09:22:53 -0400 Subject: [PATCH 09/29] Correct PrepareReschedule --- src/core/core.cpp | 5 +++ src/core/core.h | 3 ++ src/core/core_cpu.h | 2 +- src/core/hle/kernel/address_arbiter.cpp | 5 +-- src/core/hle/kernel/svc.cpp | 49 +++++++++---------------- src/core/hle/kernel/wait_object.cpp | 3 +- 6 files changed, 29 insertions(+), 38 deletions(-) diff --git a/src/core/core.cpp b/src/core/core.cpp index 5565840fd..4a95630bd 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -404,6 +404,11 @@ void System::PrepareReschedule() { CurrentCpuCore().PrepareReschedule(); } +void System::PrepareReschedule(s32 core_index) { + if (core_index >= 0) + CpuCore(core_index).PrepareReschedule(); +} + PerfStatsResults System::GetAndResetPerfStats() { return impl->GetAndResetPerfStats(); } diff --git a/src/core/core.h b/src/core/core.h index 2a002f6d7..0d1008895 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -185,6 +185,9 @@ public: /// Prepare the core emulation for a reschedule void PrepareReschedule(); + /// Prepare the core emulation for a reschedule + void PrepareReschedule(s32 core_index); + /// Gets and resets core performance statistics PerfStatsResults GetAndResetPerfStats(); diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h index 5dde2994c..0cde54787 100644 --- a/src/core/core_cpu.h +++ b/src/core/core_cpu.h @@ -14,7 +14,7 @@ namespace Kernel { class Scheduler; class GlobalScheduler; -} +} // namespace Kernel namespace Core { class System; diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 77f7bb451..c66cd16ef 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -37,8 +37,7 @@ void WakeThreads(const std::vector>& waiting_threads, s32 num_ waiting_threads[i]->SetWaitSynchronizationResult(RESULT_SUCCESS); waiting_threads[i]->SetArbiterWaitAddress(0); waiting_threads[i]->ResumeFromWait(); - if (waiting_threads[i]->GetProcessorID() >= 0) - system.CpuCore(waiting_threads[i]->GetProcessorID()).PrepareReschedule(); + system.PrepareReschedule(waiting_threads[i]->GetProcessorID()); } } } // Anonymous namespace @@ -173,7 +172,7 @@ ResultCode AddressArbiter::WaitForAddressImpl(VAddr address, s64 timeout) { current_thread->WakeAfterDelay(timeout); - system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule(); + system.PrepareReschedule(current_thread->GetProcessorID()); return RESULT_TIMEOUT; } diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index ee1e9f006..560ac3945 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -516,7 +516,7 @@ static ResultCode WaitSynchronization(Core::System& system, Handle* index, VAddr thread->WakeAfterDelay(nano_seconds); thread->SetWakeupCallback(DefaultThreadWakeupCallback); - system.CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.PrepareReschedule(thread->GetProcessorID()); return RESULT_TIMEOUT; } @@ -534,8 +534,7 @@ static ResultCode CancelSynchronization(Core::System& system, Handle thread_hand } thread->CancelWait(); - if (thread->GetProcessorID() >= 0) - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.PrepareReschedule(thread->GetProcessorID()); return RESULT_SUCCESS; } @@ -1069,8 +1068,7 @@ static ResultCode SetThreadActivity(Core::System& system, Handle handle, u32 act thread->SetActivity(static_cast(activity)); - if (thread->GetProcessorID() >= 0) - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.PrepareReschedule(thread->GetProcessorID()); return RESULT_SUCCESS; } @@ -1152,8 +1150,7 @@ static ResultCode SetThreadPriority(Core::System& system, Handle handle, u32 pri thread->SetPriority(priority); - if (thread->GetProcessorID() >= 0) - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.PrepareReschedule(thread->GetProcessorID()); return RESULT_SUCCESS; } @@ -1509,8 +1506,7 @@ static ResultCode CreateThread(Core::System& system, Handle* out_handle, VAddr e thread->SetName( fmt::format("thread[entry_point={:X}, handle={:X}]", entry_point, *new_thread_handle)); - if (thread->GetProcessorID() >= 0) - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.PrepareReschedule(thread->GetProcessorID()); return RESULT_SUCCESS; } @@ -1532,10 +1528,7 @@ static ResultCode StartThread(Core::System& system, Handle thread_handle) { thread->ResumeFromWait(); if (thread->GetStatus() == ThreadStatus::Ready) { - if (thread->GetProcessorID() >= 0) - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); - else - Core::System::GetInstance().GlobalScheduler().SetReselectionPending(); + system.PrepareReschedule(thread->GetProcessorID()); } return RESULT_SUCCESS; @@ -1582,10 +1575,7 @@ static void SleepThread(Core::System& system, s64 nanoseconds) { current_thread->Sleep(nanoseconds); } - // Reschedule all CPU cores - for (std::size_t i = 0; i < Core::NUM_CPU_CORES; ++i) { - system.CpuCore(i).PrepareReschedule(); - } + system.PrepareReschedule(current_thread->GetProcessorID()); } /// Wait process wide key atomic @@ -1632,7 +1622,7 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add // Note: Deliberately don't attempt to inherit the lock owner's priority. - system.CpuCore(current_thread->GetProcessorID()).PrepareReschedule(); + system.PrepareReschedule(current_thread->GetProcessorID()); return RESULT_SUCCESS; } @@ -1644,7 +1634,7 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var // Retrieve a list of all threads that are waiting for this condition variable. std::vector> waiting_threads; - const auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + const auto& scheduler = system.GlobalScheduler(); const auto& thread_list = scheduler.GetThreadList(); for (const auto& thread : thread_list) { @@ -1706,8 +1696,7 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var thread->SetLockOwner(nullptr); thread->SetMutexWaitAddress(0); thread->SetWaitHandle(0); - if (thread->GetProcessorID() >= 0) - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.PrepareReschedule(thread->GetProcessorID()); } else { // Atomically signal that the mutex now has a waiting thread. do { @@ -1731,8 +1720,7 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var thread->SetStatus(ThreadStatus::WaitMutex); owner->AddMutexWaiter(thread); - if (thread->GetProcessorID() >= 0) - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.PrepareReschedule(thread->GetProcessorID()); } } @@ -1758,13 +1746,10 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, } const auto arbitration_type = static_cast(type); - auto& address_arbiter = - system.Kernel().CurrentProcess()->GetAddressArbiter(); + auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter(); ResultCode result = address_arbiter.WaitForAddress(address, arbitration_type, value, timeout); if (result == RESULT_SUCCESS) - Core::System::GetInstance() - .CpuCore(GetCurrentThread()->GetProcessorID()) - .PrepareReschedule(); + system.PrepareReschedule(); return result; } @@ -2051,10 +2036,10 @@ static ResultCode SetThreadCoreMask(Core::System& system, Handle thread_handle, return ERR_INVALID_HANDLE; } - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.PrepareReschedule(thread->GetProcessorID()); thread->ChangeCore(core, affinity_mask); - if (thread->GetProcessorID() >= 0) - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + system.PrepareReschedule(thread->GetProcessorID()); + return RESULT_SUCCESS; } @@ -2165,7 +2150,7 @@ static ResultCode SignalEvent(Core::System& system, Handle handle) { } writable_event->Signal(); - Core::System::GetInstance().PrepareReschedule(); + system.PrepareReschedule(); return RESULT_SUCCESS; } diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp index e035a67e9..a65ec7dbc 100644 --- a/src/core/hle/kernel/wait_object.cpp +++ b/src/core/hle/kernel/wait_object.cpp @@ -97,8 +97,7 @@ void WaitObject::WakeupWaitingThread(SharedPtr thread) { } if (resume) { thread->ResumeFromWait(); - if (thread->GetProcessorID() >= 0) - Core::System::GetInstance().CpuCore(thread->GetProcessorID()).PrepareReschedule(); + Core::System::GetInstance().PrepareReschedule(thread->GetProcessorID()); } } From 82218c925af8bcbaa05ae9f39af2d2393de7681f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 19 Jun 2019 09:11:18 -0400 Subject: [PATCH 10/29] Kernel: Style and Corrections --- src/core/core.cpp | 5 +- src/core/core.h | 4 +- src/core/core_cpu.cpp | 2 +- src/core/core_cpu.h | 2 +- src/core/hle/kernel/address_arbiter.cpp | 1 - src/core/hle/kernel/kernel.cpp | 2 +- src/core/hle/kernel/mutex.cpp | 2 +- src/core/hle/kernel/scheduler.cpp | 78 +++++++++++++++---------- src/core/hle/kernel/scheduler.h | 53 ++++++++++------- src/core/hle/kernel/svc.cpp | 15 +++-- src/core/hle/kernel/thread.cpp | 54 +++++++++-------- src/core/hle/kernel/thread.h | 15 +++-- 12 files changed, 137 insertions(+), 96 deletions(-) diff --git a/src/core/core.cpp b/src/core/core.cpp index 4a95630bd..d79045eea 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -404,9 +404,10 @@ void System::PrepareReschedule() { CurrentCpuCore().PrepareReschedule(); } -void System::PrepareReschedule(s32 core_index) { - if (core_index >= 0) +void System::PrepareReschedule(const u32 core_index) { + if (core_index < GlobalScheduler().CpuCoresCount()) { CpuCore(core_index).PrepareReschedule(); + } } PerfStatsResults System::GetAndResetPerfStats() { diff --git a/src/core/core.h b/src/core/core.h index 0d1008895..984074ce3 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -24,10 +24,10 @@ class VfsFilesystem; } // namespace FileSys namespace Kernel { +class GlobalScheduler; class KernelCore; class Process; class Scheduler; -class GlobalScheduler; } // namespace Kernel namespace Loader { @@ -186,7 +186,7 @@ public: void PrepareReschedule(); /// Prepare the core emulation for a reschedule - void PrepareReschedule(s32 core_index); + void PrepareReschedule(u32 core_index); /// Gets and resets core performance statistics PerfStatsResults GetAndResetPerfStats(); diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index 2a7c3af24..a6f63e437 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp @@ -111,7 +111,7 @@ void Cpu::PrepareReschedule() { void Cpu::Reschedule() { // Lock the global kernel mutex when we manipulate the HLE state - std::lock_guard lock(HLE::g_hle_lock); + std::lock_guard lock(HLE::g_hle_lock); global_scheduler.SelectThread(core_index); scheduler->TryDoContextSwitch(); diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h index 0cde54787..80261daf7 100644 --- a/src/core/core_cpu.h +++ b/src/core/core_cpu.h @@ -12,8 +12,8 @@ #include "common/common_types.h" namespace Kernel { -class Scheduler; class GlobalScheduler; +class Scheduler; } // namespace Kernel namespace Core { diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index c66cd16ef..4c1d3fd18 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -22,7 +22,6 @@ namespace Kernel { namespace { // Wake up num_to_wake (or all) threads in a vector. void WakeThreads(const std::vector>& waiting_threads, s32 num_to_wake) { - auto& system = Core::System::GetInstance(); // Only process up to 'target' threads, unless 'target' is <= 0, in which case process // them all. diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index b4fd1d3f3..600d6ec74 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -89,7 +89,7 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ } struct KernelCore::Impl { - explicit Impl(Core::System& system) : system{system} {} + explicit Impl(Core::System& system) : system{system}, global_scheduler{system} {} void Initialize(KernelCore& kernel) { Shutdown(); diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp index 57f2d8bf3..eb919246c 100644 --- a/src/core/hle/kernel/mutex.cpp +++ b/src/core/hle/kernel/mutex.cpp @@ -140,7 +140,7 @@ ResultCode Mutex::Release(VAddr address) { thread->SetMutexWaitAddress(0); thread->SetWaitHandle(0); - Core::System::GetInstance().PrepareReschedule(); + system.PrepareReschedule(); return RESULT_SUCCESS; } diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 537640152..df4e9b799 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -1,6 +1,9 @@ // Copyright 2018 yuzu emulator team // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +// +// SelectThreads, Yield functions originally by TuxSH. +// licensed under GPLv2 or later under exception provided by the author. #include #include @@ -19,16 +22,15 @@ namespace Kernel { -/* - * SelectThreads, Yield functions originally by TuxSH. - * licensed under GPLv2 or later under exception provided by the author. - */ +GlobalScheduler::GlobalScheduler(Core::System& system) : system{system} { + reselection_pending = false; +} void GlobalScheduler::AddThread(SharedPtr thread) { thread_list.push_back(std::move(thread)); } -void GlobalScheduler::RemoveThread(Thread* thread) { +void GlobalScheduler::RemoveThread(const Thread* thread) { thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread), thread_list.end()); } @@ -37,7 +39,7 @@ void GlobalScheduler::RemoveThread(Thread* thread) { * UnloadThread selects a core and forces it to unload its current thread's context */ void GlobalScheduler::UnloadThread(s32 core) { - Scheduler& sched = Core::System::GetInstance().Scheduler(core); + Scheduler& sched = system.Scheduler(core); sched.UnloadThread(); } @@ -52,7 +54,7 @@ void GlobalScheduler::UnloadThread(s32 core) { * thread in another core and swap it with its current thread. */ void GlobalScheduler::SelectThread(u32 core) { - auto update_thread = [](Thread* thread, Scheduler& sched) { + const auto update_thread = [](Thread* thread, Scheduler& sched) { if (thread != sched.selected_thread) { if (thread == nullptr) { ++sched.idle_selection_count; @@ -62,7 +64,7 @@ void GlobalScheduler::SelectThread(u32 core) { sched.context_switch_pending = sched.selected_thread != sched.current_thread; std::atomic_thread_fence(std::memory_order_seq_cst); }; - Scheduler& sched = Core::System::GetInstance().Scheduler(core); + Scheduler& sched = system.Scheduler(core); Thread* current_thread = nullptr; // Step 1: Get top thread in schedule queue. current_thread = scheduled_queue[core].empty() ? nullptr : scheduled_queue[core].front(); @@ -118,8 +120,8 @@ void GlobalScheduler::SelectThread(u32 core) { */ void GlobalScheduler::YieldThread(Thread* yielding_thread) { // Note: caller should use critical section, etc. - u32 core_id = static_cast(yielding_thread->GetProcessorID()); - u32 priority = yielding_thread->GetPriority(); + const u32 core_id = static_cast(yielding_thread->GetProcessorID()); + const u32 priority = yielding_thread->GetPriority(); // Yield the thread ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority), @@ -139,8 +141,8 @@ void GlobalScheduler::YieldThread(Thread* yielding_thread) { void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, // etc. - u32 core_id = static_cast(yielding_thread->GetProcessorID()); - u32 priority = yielding_thread->GetPriority(); + const u32 core_id = static_cast(yielding_thread->GetProcessorID()); + const u32 priority = yielding_thread->GetPriority(); // Yield the thread ASSERT_MSG(yielding_thread == scheduled_queue[core_id].front(priority), @@ -155,12 +157,13 @@ void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { Thread* next_thread = scheduled_queue[core_id].front(priority); Thread* winner = nullptr; for (auto& thread : suggested_queue[core_id]) { - s32 source_core = thread->GetProcessorID(); + const s32 source_core = thread->GetProcessorID(); if (source_core >= 0) { if (current_threads[source_core] != nullptr) { if (thread == current_threads[source_core] || - current_threads[source_core]->GetPriority() < min_regular_priority) + current_threads[source_core]->GetPriority() < min_regular_priority) { continue; + } } if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() || next_thread->GetPriority() < thread->GetPriority()) { @@ -174,8 +177,9 @@ void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { if (winner != nullptr) { if (winner != yielding_thread) { - if (winner->IsRunning()) + if (winner->IsRunning()) { UnloadThread(winner->GetProcessorID()); + } TransferToCore(winner->GetPriority(), core_id, winner); } } else { @@ -195,7 +199,7 @@ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, // etc. Thread* winner = nullptr; - u32 core_id = static_cast(yielding_thread->GetProcessorID()); + const u32 core_id = static_cast(yielding_thread->GetProcessorID()); // Remove the thread from its scheduled mlq, put it on the corresponding "suggested" one instead TransferToCore(yielding_thread->GetPriority(), -1, yielding_thread); @@ -209,9 +213,10 @@ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread current_threads[i] = scheduled_queue[i].empty() ? nullptr : scheduled_queue[i].front(); } for (auto& thread : suggested_queue[core_id]) { - s32 source_core = thread->GetProcessorID(); - if (source_core < 0 || thread == current_threads[source_core]) + const s32 source_core = thread->GetProcessorID(); + if (source_core < 0 || thread == current_threads[source_core]) { continue; + } if (current_threads[source_core] == nullptr || current_threads[source_core]->GetPriority() >= min_regular_priority) { winner = thread; @@ -220,8 +225,9 @@ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread } if (winner != nullptr) { if (winner != yielding_thread) { - if (winner->IsRunning()) + if (winner->IsRunning()) { UnloadThread(winner->GetProcessorID()); + } TransferToCore(winner->GetPriority(), core_id, winner); } } else { @@ -232,6 +238,16 @@ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread AskForReselectionOrMarkRedundant(yielding_thread, winner); } +void GlobalScheduler::Schedule(u32 priority, u32 core, Thread* thread) { + ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core."); + scheduled_queue[core].add(thread, priority); +} + +void GlobalScheduler::SchedulePrepend(u32 priority, u32 core, Thread* thread) { + ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core."); + scheduled_queue[core].add(thread, priority, false); +} + void GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) { if (current_thread == winner) { // TODO(blinkhawk): manage redundant operations, this is not implemented. @@ -244,13 +260,13 @@ void GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, T GlobalScheduler::~GlobalScheduler() = default; -Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 id) - : system(system), cpu_core(cpu_core), id(id) {} +Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 core_id) + : system(system), cpu_core(cpu_core), core_id(core_id) {} -Scheduler::~Scheduler() {} +Scheduler::~Scheduler() = default; bool Scheduler::HaveReadyThreads() const { - return system.GlobalScheduler().HaveReadyThreads(id); + return system.GlobalScheduler().HaveReadyThreads(core_id); } Thread* Scheduler::GetCurrentThread() const { @@ -262,7 +278,7 @@ Thread* Scheduler::GetSelectedThread() const { } void Scheduler::SelectThreads() { - system.GlobalScheduler().SelectThread(id); + system.GlobalScheduler().SelectThread(core_id); } u64 Scheduler::GetLastContextSwitchTicks() const { @@ -270,13 +286,14 @@ u64 Scheduler::GetLastContextSwitchTicks() const { } void Scheduler::TryDoContextSwitch() { - if (context_switch_pending) + if (context_switch_pending) { SwitchContext(); + } } void Scheduler::UnloadThread() { Thread* const previous_thread = GetCurrentThread(); - Process* const previous_process = Core::CurrentProcess(); + Process* const previous_process = system.Kernel().CurrentProcess(); UpdateLastContextSwitchTime(previous_thread, previous_process); @@ -301,10 +318,11 @@ void Scheduler::SwitchContext() { Thread* const new_thread = GetSelectedThread(); context_switch_pending = false; - if (new_thread == previous_thread) + if (new_thread == previous_thread) { return; + } - Process* const previous_process = Core::CurrentProcess(); + Process* const previous_process = system.Kernel().CurrentProcess(); UpdateLastContextSwitchTime(previous_thread, previous_process); @@ -324,7 +342,7 @@ void Scheduler::SwitchContext() { // Load context of new thread if (new_thread) { - ASSERT_MSG(new_thread->GetProcessorID() == this->id, + ASSERT_MSG(new_thread->GetProcessorID() == this->core_id, "Thread must be assigned to this core."); ASSERT_MSG(new_thread->GetStatus() == ThreadStatus::Ready, "Thread must be ready to become running."); @@ -353,7 +371,7 @@ void Scheduler::SwitchContext() { void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { const u64 prev_switch_ticks = last_context_switch_time; - const u64 most_recent_switch_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); + const u64 most_recent_switch_ticks = system.CoreTiming().GetTicks(); const u64 update_ticks = most_recent_switch_ticks - prev_switch_ticks; if (thread != nullptr) { diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 82ed64b55..1c9d8a30f 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -24,62 +24,70 @@ class GlobalScheduler final { public: static constexpr u32 NUM_CPU_CORES = 4; - GlobalScheduler() { - reselection_pending = false; - } + explicit GlobalScheduler(Core::System& system); ~GlobalScheduler(); /// Adds a new thread to the scheduler void AddThread(SharedPtr thread); /// Removes a thread from the scheduler - void RemoveThread(Thread* thread); + void RemoveThread(const Thread* thread); /// Returns a list of all threads managed by the scheduler const std::vector>& GetThreadList() const { return thread_list; } + // Add a thread to the suggested queue of a cpu core. Suggested threads may be + // picked if no thread is scheduled to run on the core. void Suggest(u32 priority, u32 core, Thread* thread) { suggested_queue[core].add(thread, priority); } + // Remove a thread to the suggested queue of a cpu core. Suggested threads may be + // picked if no thread is scheduled to run on the core. void Unsuggest(u32 priority, u32 core, Thread* thread) { suggested_queue[core].remove(thread, priority); } - void Schedule(u32 priority, u32 core, Thread* thread) { - ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core."); - scheduled_queue[core].add(thread, priority); - } + // Add a thread to the scheduling queue of a cpu core. The thread is added at the + // back the queue in its priority level + void Schedule(u32 priority, u32 core, Thread* thread); - void SchedulePrepend(u32 priority, u32 core, Thread* thread) { - ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core."); - scheduled_queue[core].add(thread, priority, false); - } + // Add a thread to the scheduling queue of a cpu core. The thread is added at the + // front the queue in its priority level + void SchedulePrepend(u32 priority, u32 core, Thread* thread); + // Reschedule an already scheduled thread based on a new priority void Reschedule(u32 priority, u32 core, Thread* thread) { scheduled_queue[core].remove(thread, priority); scheduled_queue[core].add(thread, priority); } + // Unschedule a thread. void Unschedule(u32 priority, u32 core, Thread* thread) { scheduled_queue[core].remove(thread, priority); } + // Transfers a thread into an specific core. If the destination_core is -1 + // it will be unscheduled from its source code and added into its suggested + // queue. void TransferToCore(u32 priority, s32 destination_core, Thread* thread) { - bool schedulable = thread->GetPriority() < THREADPRIO_COUNT; - s32 source_core = thread->GetProcessorID(); - if (source_core == destination_core || !schedulable) + const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT; + const s32 source_core = thread->GetProcessorID(); + if (source_core == destination_core || !schedulable) { return; + } thread->SetProcessorID(destination_core); - if (source_core >= 0) + if (source_core >= 0) { Unschedule(priority, source_core, thread); + } if (destination_core >= 0) { Unsuggest(priority, destination_core, thread); Schedule(priority, destination_core, thread); } - if (source_core >= 0) + if (source_core >= 0) { Suggest(priority, source_core, thread); + } } /* @@ -99,7 +107,7 @@ public: */ void SelectThread(u32 core); - bool HaveReadyThreads(u32 core_id) { + bool HaveReadyThreads(u32 core_id) const { return !scheduled_queue[core_id].empty(); } @@ -133,8 +141,8 @@ public: reselection_pending.store(true, std::memory_order_release); } - bool IsReselectionPending() { - return reselection_pending.load(std::memory_order_acquire); + bool IsReselectionPending() const { + return reselection_pending.load(); } private: @@ -147,11 +155,12 @@ private: /// Lists all thread ids that aren't deleted/etc. std::vector> thread_list; + Core::System& system; }; class Scheduler final { public: - explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, const u32 id); + explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, const u32 core_id); ~Scheduler(); /// Returns whether there are any threads that are ready to run. @@ -204,7 +213,7 @@ private: Core::ARM_Interface& cpu_core; u64 last_context_switch_time = 0; u64 idle_selection_count = 0; - const u32 id; + const u32 core_id; bool context_switch_pending = false; }; diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 560ac3945..d520ed033 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1560,13 +1560,13 @@ static void SleepThread(Core::System& system, s64 nanoseconds) { if (nanoseconds <= 0) { switch (static_cast(nanoseconds)) { case SleepType::YieldWithoutLoadBalancing: - current_thread->YieldType0(); + current_thread->YieldSimple(); break; case SleepType::YieldWithLoadBalancing: - current_thread->YieldType1(); + current_thread->YieldAndBalanceLoad(); break; case SleepType::YieldAndWaitForLoadBalancing: - current_thread->YieldType2(); + current_thread->YieldAndWaitForLoadBalancing(); break; default: UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); @@ -1638,8 +1638,9 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var const auto& thread_list = scheduler.GetThreadList(); for (const auto& thread : thread_list) { - if (thread->GetCondVarWaitAddress() == condition_variable_addr) + if (thread->GetCondVarWaitAddress() == condition_variable_addr) { waiting_threads.push_back(thread); + } } // Sort them by priority, such that the highest priority ones come first. @@ -1747,9 +1748,11 @@ static ResultCode WaitForAddress(Core::System& system, VAddr address, u32 type, const auto arbitration_type = static_cast(type); auto& address_arbiter = system.Kernel().CurrentProcess()->GetAddressArbiter(); - ResultCode result = address_arbiter.WaitForAddress(address, arbitration_type, value, timeout); - if (result == RESULT_SUCCESS) + const ResultCode result = + address_arbiter.WaitForAddress(address, arbitration_type, value, timeout); + if (result == RESULT_SUCCESS) { system.PrepareReschedule(); + } return result; } diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index d0fa7b370..8cf0a7ec7 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -373,43 +373,44 @@ void Thread::Sleep(s64 nanoseconds) { WakeAfterDelay(nanoseconds); } -void Thread::YieldType0() { +void Thread::YieldSimple() { auto& scheduler = kernel.GlobalScheduler(); scheduler.YieldThread(this); } -void Thread::YieldType1() { +void Thread::YieldAndBalanceLoad() { auto& scheduler = kernel.GlobalScheduler(); scheduler.YieldThreadAndBalanceLoad(this); } -void Thread::YieldType2() { +void Thread::YieldAndWaitForLoadBalancing() { auto& scheduler = kernel.GlobalScheduler(); scheduler.YieldThreadAndWaitForLoadBalancing(this); } void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) { - u32 old_flags = scheduling_state; + const u32 old_flags = scheduling_state; scheduling_state = (scheduling_state & ThreadSchedMasks::HighMask) | static_cast(new_status); AdjustSchedulingOnStatus(old_flags); } void Thread::SetCurrentPriority(u32 new_priority) { - u32 old_priority = current_priority; - current_priority = new_priority; + u32 old_priority = std::exchange(current_priority, new_priority); AdjustSchedulingOnPriority(old_priority); } ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { - auto HighestSetCore = [](u64 mask, u32 max_cores) { + const auto HighestSetCore = [](u64 mask, u32 max_cores) { for (s32 core = max_cores - 1; core >= 0; core--) { - if (((mask >> core) & 1) != 0) + if (((mask >> core) & 1) != 0) { return core; + } } return -1; }; - bool use_override = affinity_override_count != 0; + + const bool use_override = affinity_override_count != 0; // The value -3 is "do not change the ideal core". if (new_core == -3) { new_core = use_override ? ideal_core_override : ideal_core; @@ -421,11 +422,10 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { ideal_core_override = new_core; affinity_mask_override = new_affinity_mask; } else { - u64 old_affinity_mask = affinity_mask; + const u64 old_affinity_mask = std::exchange(affinity_mask, new_affinity_mask); ideal_core = new_core; - affinity_mask = new_affinity_mask; if (old_affinity_mask != new_affinity_mask) { - s32 old_core = processor_id; + const s32 old_core = processor_id; if (processor_id >= 0 && ((affinity_mask >> processor_id) & 1) == 0) { if (ideal_core < 0) { processor_id = HighestSetCore(affinity_mask, GlobalScheduler::NUM_CPU_CORES); @@ -440,28 +440,33 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { } void Thread::AdjustSchedulingOnStatus(u32 old_flags) { - if (old_flags == scheduling_state) + if (old_flags == scheduling_state) { return; + } auto& scheduler = kernel.GlobalScheduler(); if (static_cast(old_flags & ThreadSchedMasks::LowMask) == ThreadSchedStatus::Runnable) { // In this case the thread was running, now it's pausing/exitting - if (processor_id >= 0) + if (processor_id >= 0) { scheduler.Unschedule(current_priority, processor_id, this); + } - for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { - if (core != processor_id && ((affinity_mask >> core) & 1) != 0) + for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + if (core != processor_id && ((affinity_mask >> core) & 1) != 0) { scheduler.Unsuggest(current_priority, core, this); + } } } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) { // The thread is now set to running from being stopped - if (processor_id >= 0) + if (processor_id >= 0) { scheduler.Schedule(current_priority, processor_id, this); + } - for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { - if (core != processor_id && ((affinity_mask >> core) & 1) != 0) + for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + if (core != processor_id && ((affinity_mask >> core) & 1) != 0) { scheduler.Suggest(current_priority, core, this); + } } } @@ -477,7 +482,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { scheduler.Unschedule(old_priority, processor_id, this); } - for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { if (core != processor_id && ((affinity_mask >> core) & 1) != 0) { scheduler.Unsuggest(old_priority, core, this); } @@ -494,7 +499,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { } } - for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { if (core != processor_id && ((affinity_mask >> core) & 1) != 0) { scheduler.Suggest(current_priority, core, this); } @@ -506,10 +511,11 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { auto& scheduler = Core::System::GetInstance().GlobalScheduler(); if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || - current_priority >= THREADPRIO_COUNT) + current_priority >= THREADPRIO_COUNT) { return; + } - for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { if (((old_affinity_mask >> core) & 1) != 0) { if (core == old_core) { scheduler.Unschedule(current_priority, core, this); @@ -519,7 +525,7 @@ void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { } } - for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { if (((affinity_mask >> core) & 1) != 0) { if (core == processor_id) { scheduler.Schedule(current_priority, core, this); diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index c426a7209..bf0cae959 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -75,7 +75,12 @@ enum class ThreadActivity : u32 { Paused = 1, }; -enum class ThreadSchedStatus : u32 { None = 0, Paused = 1, Runnable = 2, Exited = 3 }; +enum class ThreadSchedStatus : u32 { + None = 0, + Paused = 1, + Runnable = 2, + Exited = 3, +}; enum ThreadSchedFlags : u32 { ProcessPauseFlag = 1 << 4, @@ -403,15 +408,15 @@ public: void Sleep(s64 nanoseconds); /// Yields this thread without rebalancing loads. - void YieldType0(); + void YieldSimple(); /// Yields this thread and does a load rebalancing. - void YieldType1(); + void YieldAndBalanceLoad(); /// Yields this thread and if the core is left idle, loads are rebalanced - void YieldType2(); + void YieldAndWaitForLoadBalancing(); - ThreadSchedStatus GetSchedulingStatus() { + ThreadSchedStatus GetSchedulingStatus() const { return static_cast(scheduling_state & ThreadSchedMasks::LowMask); } From 103f3a2fe51a09caf3f478226b6957b23c6eff79 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 10 Sep 2019 10:23:43 -0400 Subject: [PATCH 11/29] Scheduler: Add protections for Yield bombing In case of redundant yields, the scheduler will now idle the core for it's timeslice, in order to avoid continuously yielding the same thing over and over. --- src/core/hle/kernel/scheduler.cpp | 16 +++++++++------- src/core/hle/kernel/scheduler.h | 8 ++++---- src/core/hle/kernel/svc.cpp | 13 +++++++++---- src/core/hle/kernel/thread.cpp | 12 ++++++------ src/core/hle/kernel/thread.h | 6 +++--- 5 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index df4e9b799..451fd8077 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -118,7 +118,7 @@ void GlobalScheduler::SelectThread(u32 core) { * YieldThread takes a thread and moves it to the back of the it's priority list * This operation can be redundant and no scheduling is changed if marked as so. */ -void GlobalScheduler::YieldThread(Thread* yielding_thread) { +bool GlobalScheduler::YieldThread(Thread* yielding_thread) { // Note: caller should use critical section, etc. const u32 core_id = static_cast(yielding_thread->GetProcessorID()); const u32 priority = yielding_thread->GetPriority(); @@ -129,7 +129,7 @@ void GlobalScheduler::YieldThread(Thread* yielding_thread) { scheduled_queue[core_id].yield(priority); Thread* winner = scheduled_queue[core_id].front(priority); - AskForReselectionOrMarkRedundant(yielding_thread, winner); + return AskForReselectionOrMarkRedundant(yielding_thread, winner); } /* @@ -138,7 +138,7 @@ void GlobalScheduler::YieldThread(Thread* yielding_thread) { * a better priority than the next thread in the core. * This operation can be redundant and no scheduling is changed if marked as so. */ -void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { +bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, // etc. const u32 core_id = static_cast(yielding_thread->GetProcessorID()); @@ -186,7 +186,7 @@ void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { winner = next_thread; } - AskForReselectionOrMarkRedundant(yielding_thread, winner); + return AskForReselectionOrMarkRedundant(yielding_thread, winner); } /* @@ -195,7 +195,7 @@ void GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { * a suggested thread is obtained instead. * This operation can be redundant and no scheduling is changed if marked as so. */ -void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) { +bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread) { // Note: caller should check if !thread.IsSchedulerOperationRedundant and use critical section, // etc. Thread* winner = nullptr; @@ -235,7 +235,7 @@ void GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread } } - AskForReselectionOrMarkRedundant(yielding_thread, winner); + return AskForReselectionOrMarkRedundant(yielding_thread, winner); } void GlobalScheduler::Schedule(u32 priority, u32 core, Thread* thread) { @@ -248,13 +248,15 @@ void GlobalScheduler::SchedulePrepend(u32 priority, u32 core, Thread* thread) { scheduled_queue[core].add(thread, priority, false); } -void GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) { +bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) { if (current_thread == winner) { // TODO(blinkhawk): manage redundant operations, this is not implemented. // as its mostly an optimization. // current_thread->SetRedundantSchedulerOperation(); + return true; } else { reselection_pending.store(true, std::memory_order_release); + return false; } } diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 1c9d8a30f..8fcc86bae 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -115,7 +115,7 @@ public: * YieldThread takes a thread and moves it to the back of the it's priority list * This operation can be redundant and no scheduling is changed if marked as so. */ - void YieldThread(Thread* thread); + bool YieldThread(Thread* thread); /* * YieldThreadAndBalanceLoad takes a thread and moves it to the back of the it's priority list. @@ -123,7 +123,7 @@ public: * a better priority than the next thread in the core. * This operation can be redundant and no scheduling is changed if marked as so. */ - void YieldThreadAndBalanceLoad(Thread* thread); + bool YieldThreadAndBalanceLoad(Thread* thread); /* * YieldThreadAndWaitForLoadBalancing takes a thread and moves it out of the scheduling queue @@ -131,7 +131,7 @@ public: * a suggested thread is obtained instead. * This operation can be redundant and no scheduling is changed if marked as so. */ - void YieldThreadAndWaitForLoadBalancing(Thread* thread); + bool YieldThreadAndWaitForLoadBalancing(Thread* thread); u32 CpuCoresCount() const { return NUM_CPU_CORES; @@ -146,7 +146,7 @@ public: } private: - void AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner); + bool AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner); static constexpr u32 min_regular_priority = 2; std::array, NUM_CPU_CORES> scheduled_queue; diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index d520ed033..bd67fc96d 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1556,17 +1556,18 @@ static void SleepThread(Core::System& system, s64 nanoseconds) { auto& scheduler = system.CurrentScheduler(); auto* const current_thread = scheduler.GetCurrentThread(); + bool redundant = false; if (nanoseconds <= 0) { switch (static_cast(nanoseconds)) { case SleepType::YieldWithoutLoadBalancing: - current_thread->YieldSimple(); + redundant = current_thread->YieldSimple(); break; case SleepType::YieldWithLoadBalancing: - current_thread->YieldAndBalanceLoad(); + redundant = current_thread->YieldAndBalanceLoad(); break; case SleepType::YieldAndWaitForLoadBalancing: - current_thread->YieldAndWaitForLoadBalancing(); + redundant = current_thread->YieldAndWaitForLoadBalancing(); break; default: UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); @@ -1575,7 +1576,11 @@ static void SleepThread(Core::System& system, s64 nanoseconds) { current_thread->Sleep(nanoseconds); } - system.PrepareReschedule(current_thread->GetProcessorID()); + if (redundant) { + system.CoreTiming().Idle(); + } else { + system.PrepareReschedule(current_thread->GetProcessorID()); + } } /// Wait process wide key atomic diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 8cf0a7ec7..ae62609e3 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -373,19 +373,19 @@ void Thread::Sleep(s64 nanoseconds) { WakeAfterDelay(nanoseconds); } -void Thread::YieldSimple() { +bool Thread::YieldSimple() { auto& scheduler = kernel.GlobalScheduler(); - scheduler.YieldThread(this); + return scheduler.YieldThread(this); } -void Thread::YieldAndBalanceLoad() { +bool Thread::YieldAndBalanceLoad() { auto& scheduler = kernel.GlobalScheduler(); - scheduler.YieldThreadAndBalanceLoad(this); + return scheduler.YieldThreadAndBalanceLoad(this); } -void Thread::YieldAndWaitForLoadBalancing() { +bool Thread::YieldAndWaitForLoadBalancing() { auto& scheduler = kernel.GlobalScheduler(); - scheduler.YieldThreadAndWaitForLoadBalancing(this); + return scheduler.YieldThreadAndWaitForLoadBalancing(this); } void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) { diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index bf0cae959..88255099f 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -408,13 +408,13 @@ public: void Sleep(s64 nanoseconds); /// Yields this thread without rebalancing loads. - void YieldSimple(); + bool YieldSimple(); /// Yields this thread and does a load rebalancing. - void YieldAndBalanceLoad(); + bool YieldAndBalanceLoad(); /// Yields this thread and if the core is left idle, loads are rebalanced - void YieldAndWaitForLoadBalancing(); + bool YieldAndWaitForLoadBalancing(); ThreadSchedStatus GetSchedulingStatus() const { return static_cast(scheduling_state & ThreadSchedMasks::LowMask); From b49c0dab8772afb06358e5d19af092226b3a59bb Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 10 Sep 2019 11:04:40 -0400 Subject: [PATCH 12/29] Kernel: Initial implementation of thread preemption. --- src/core/hle/kernel/kernel.cpp | 16 ++++++++++++++++ src/core/hle/kernel/scheduler.cpp | 10 ++++++++++ src/core/hle/kernel/scheduler.h | 4 ++++ 3 files changed, 30 insertions(+) diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 600d6ec74..7a913520d 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -12,6 +12,7 @@ #include "core/core.h" #include "core/core_timing.h" +#include "core/core_timing_util.h" #include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/client_port.h" #include "core/hle/kernel/handle_table.h" @@ -96,6 +97,7 @@ struct KernelCore::Impl { InitializeSystemResourceLimit(kernel); InitializeThreads(); + InitializePreemption(); } void Shutdown() { @@ -111,6 +113,7 @@ struct KernelCore::Impl { thread_wakeup_callback_handle_table.Clear(); thread_wakeup_event_type = nullptr; + preemption_event = nullptr; named_ports.clear(); } @@ -133,6 +136,18 @@ struct KernelCore::Impl { system.CoreTiming().RegisterEvent("ThreadWakeupCallback", ThreadWakeupCallback); } + void InitializePreemption() { + preemption_event = system.CoreTiming().RegisterEvent( + "PreemptionCallback", [this](u64 userdata, s64 cycles_late) { + global_scheduler.PreemptThreads(); + s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10)); + system.CoreTiming().ScheduleEvent(time_interval, preemption_event); + }); + + s64 time_interval = Core::Timing::msToCycles(std::chrono::milliseconds(10)); + system.CoreTiming().ScheduleEvent(time_interval, preemption_event); + } + std::atomic next_object_id{0}; std::atomic next_kernel_process_id{Process::InitialKIPIDMin}; std::atomic next_user_process_id{Process::ProcessIDMin}; @@ -146,6 +161,7 @@ struct KernelCore::Impl { SharedPtr system_resource_limit; Core::Timing::EventType* thread_wakeup_event_type = nullptr; + Core::Timing::EventType* preemption_event = nullptr; // TODO(yuriks): This can be removed if Thread objects are explicitly pooled in the future, // allowing us to simply use a pool index or similar. Kernel::HandleTable thread_wakeup_callback_handle_table; diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 451fd8077..0d45307cd 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -238,6 +238,16 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread return AskForReselectionOrMarkRedundant(yielding_thread, winner); } +void GlobalScheduler::PreemptThreads() { + for (std::size_t core_id = 0; core_id < NUM_CPU_CORES; core_id++) { + const u64 priority = preemption_priorities[core_id]; + if (scheduled_queue[core_id].size(priority) > 1) { + scheduled_queue[core_id].yield(priority); + reselection_pending.store(true, std::memory_order_release); + } + } +} + void GlobalScheduler::Schedule(u32 priority, u32 core, Thread* thread) { ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core."); scheduled_queue[core].add(thread, priority); diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 8fcc86bae..c13a368fd 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -133,6 +133,8 @@ public: */ bool YieldThreadAndWaitForLoadBalancing(Thread* thread); + void PreemptThreads(); + u32 CpuCoresCount() const { return NUM_CPU_CORES; } @@ -153,6 +155,8 @@ private: std::array, NUM_CPU_CORES> suggested_queue; std::atomic reselection_pending; + std::array preemption_priorities = {59, 59, 59, 62}; + /// Lists all thread ids that aren't deleted/etc. std::vector> thread_list; Core::System& system; From 2d382de6fa79123fae7842246588651ee99b15e2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 10 Sep 2019 15:26:24 -0400 Subject: [PATCH 13/29] Scheduler: Corrections to YieldAndBalanceLoad and Yield bombing protection. --- src/core/hle/kernel/scheduler.cpp | 14 +++++++------- src/core/hle/kernel/scheduler.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 0d45307cd..78463cef5 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -165,12 +165,12 @@ bool GlobalScheduler::YieldThreadAndBalanceLoad(Thread* yielding_thread) { continue; } } - if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() || - next_thread->GetPriority() < thread->GetPriority()) { - if (thread->GetPriority() <= priority) { - winner = thread; - break; - } + } + if (next_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks() || + next_thread->GetPriority() < thread->GetPriority()) { + if (thread->GetPriority() <= priority) { + winner = thread; + break; } } } @@ -240,7 +240,7 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread void GlobalScheduler::PreemptThreads() { for (std::size_t core_id = 0; core_id < NUM_CPU_CORES; core_id++) { - const u64 priority = preemption_priorities[core_id]; + const u32 priority = preemption_priorities[core_id]; if (scheduled_queue[core_id].size(priority) > 1) { scheduled_queue[core_id].yield(priority); reselection_pending.store(true, std::memory_order_release); diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index c13a368fd..408e20c88 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -155,7 +155,7 @@ private: std::array, NUM_CPU_CORES> suggested_queue; std::atomic reselection_pending; - std::array preemption_priorities = {59, 59, 59, 62}; + std::array preemption_priorities = {59, 59, 59, 62}; /// Lists all thread ids that aren't deleted/etc. std::vector> thread_list; From 0cf26cee593c3c6abe909f3db52d972f846b13a9 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 11 Sep 2019 12:14:37 -0400 Subject: [PATCH 14/29] Scheduler: Implement Yield Count and Core migration on Thread Preemption. --- src/core/hle/kernel/scheduler.cpp | 81 +++++++++++++++++++++++++++++-- src/core/hle/kernel/thread.h | 9 ++++ 2 files changed, 85 insertions(+), 5 deletions(-) diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 78463cef5..5581c43bf 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -241,10 +241,83 @@ bool GlobalScheduler::YieldThreadAndWaitForLoadBalancing(Thread* yielding_thread void GlobalScheduler::PreemptThreads() { for (std::size_t core_id = 0; core_id < NUM_CPU_CORES; core_id++) { const u32 priority = preemption_priorities[core_id]; - if (scheduled_queue[core_id].size(priority) > 1) { + + if (scheduled_queue[core_id].size(priority) > 0) { + scheduled_queue[core_id].front(priority)->IncrementYieldCount(); scheduled_queue[core_id].yield(priority); - reselection_pending.store(true, std::memory_order_release); + if (scheduled_queue[core_id].size(priority) > 1) { + scheduled_queue[core_id].front(priority)->IncrementYieldCount(); + } } + + Thread* current_thread = + scheduled_queue[core_id].empty() ? nullptr : scheduled_queue[core_id].front(); + Thread* winner = nullptr; + for (auto& thread : suggested_queue[core_id]) { + const s32 source_core = thread->GetProcessorID(); + if (thread->GetPriority() != priority) { + continue; + } + if (source_core >= 0) { + Thread* next_thread = scheduled_queue[source_core].empty() + ? nullptr + : scheduled_queue[source_core].front(); + if (next_thread != nullptr && next_thread->GetPriority() < 2) { + break; + } + if (next_thread == thread) { + continue; + } + } + if (current_thread != nullptr && + current_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks()) { + winner = thread; + break; + } + } + + if (winner != nullptr) { + if (winner->IsRunning()) { + UnloadThread(winner->GetProcessorID()); + } + TransferToCore(winner->GetPriority(), core_id, winner); + current_thread = winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread; + } + + if (current_thread != nullptr && current_thread->GetPriority() > priority) { + for (auto& thread : suggested_queue[core_id]) { + const s32 source_core = thread->GetProcessorID(); + if (thread->GetPriority() > priority) { + continue; + } + if (source_core >= 0) { + Thread* next_thread = scheduled_queue[source_core].empty() + ? nullptr + : scheduled_queue[source_core].front(); + if (next_thread != nullptr && next_thread->GetPriority() < 2) { + break; + } + if (next_thread == thread) { + continue; + } + } + if (current_thread != nullptr && + current_thread->GetLastRunningTicks() >= thread->GetLastRunningTicks()) { + winner = thread; + break; + } + } + + if (winner != nullptr) { + if (winner->IsRunning()) { + UnloadThread(winner->GetProcessorID()); + } + TransferToCore(winner->GetPriority(), core_id, winner); + current_thread = winner; + } + } + + reselection_pending.store(true, std::memory_order_release); } } @@ -260,9 +333,7 @@ void GlobalScheduler::SchedulePrepend(u32 priority, u32 core, Thread* thread) { bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) { if (current_thread == winner) { - // TODO(blinkhawk): manage redundant operations, this is not implemented. - // as its mostly an optimization. - // current_thread->SetRedundantSchedulerOperation(); + current_thread->IncrementYieldCount(); return true; } else { reselection_pending.store(true, std::memory_order_release); diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 88255099f..bec23a0e0 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -416,6 +416,14 @@ public: /// Yields this thread and if the core is left idle, loads are rebalanced bool YieldAndWaitForLoadBalancing(); + void IncrementYieldCount() { + yield_count++; + } + + u64 GetYieldCount() const { + return yield_count; + } + ThreadSchedStatus GetSchedulingStatus() const { return static_cast(scheduling_state & ThreadSchedMasks::LowMask); } @@ -460,6 +468,7 @@ private: u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks. u64 last_running_ticks = 0; ///< CPU tick when thread was last running + u64 yield_count = 0; ///< Number of innecessaries yields occured. s32 processor_id = 0; From e05a8c2385a68be6b1f6079c656fa46336546927 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Wed, 11 Sep 2019 12:47:37 -0400 Subject: [PATCH 15/29] Kernel: Remove global system accessor from WaitObject --- src/core/hle/kernel/kernel.cpp | 8 ++++++++ src/core/hle/kernel/kernel.h | 6 ++++++ src/core/hle/kernel/scheduler.cpp | 2 +- src/core/hle/kernel/wait_object.cpp | 3 ++- 4 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 7a913520d..77edbcd1f 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -229,6 +229,14 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const { return impl->global_scheduler; } +Core::System& KernelCore::System() { + return impl->system; +} + +const Core::System& KernelCore::System() const { + return impl->system; +} + void KernelCore::AddNamedPort(std::string name, SharedPtr port) { impl->named_ports.emplace(std::move(name), std::move(port)); } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index f9f5bdc88..0fc4d1f36 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -82,6 +82,12 @@ public: /// Gets the sole instance of the global scheduler const Kernel::GlobalScheduler& GlobalScheduler() const; + /// Gets the sole instance of the system + Core::System& System(); + + /// Gets the sole instance of the system + const Core::System& System() const; + /// Adds a port to the named port table void AddNamedPort(std::string name, SharedPtr port); diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 5581c43bf..60d936c9a 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -287,7 +287,7 @@ void GlobalScheduler::PreemptThreads() { if (current_thread != nullptr && current_thread->GetPriority() > priority) { for (auto& thread : suggested_queue[core_id]) { const s32 source_core = thread->GetProcessorID(); - if (thread->GetPriority() > priority) { + if (thread->GetPriority() < priority) { continue; } if (source_core >= 0) { diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp index a65ec7dbc..50ed2a2f1 100644 --- a/src/core/hle/kernel/wait_object.cpp +++ b/src/core/hle/kernel/wait_object.cpp @@ -8,6 +8,7 @@ #include "common/logging/log.h" #include "core/core.h" #include "core/core_cpu.h" +#include "core/hle/kernel/kernel.h" #include "core/hle/kernel/object.h" #include "core/hle/kernel/process.h" #include "core/hle/kernel/thread.h" @@ -97,7 +98,7 @@ void WaitObject::WakeupWaitingThread(SharedPtr thread) { } if (resume) { thread->ResumeFromWait(); - Core::System::GetInstance().PrepareReschedule(thread->GetProcessorID()); + kernel.System().PrepareReschedule(thread->GetProcessorID()); } } From 1ec1e8137356c64d624d90cd67acebb10f056abd Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 30 Sep 2019 20:50:59 -0400 Subject: [PATCH 16/29] Kernel: Clang Format --- src/core/hle/kernel/scheduler.cpp | 3 ++- src/core/hle/kernel/thread.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 60d936c9a..226d15d88 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -281,7 +281,8 @@ void GlobalScheduler::PreemptThreads() { UnloadThread(winner->GetProcessorID()); } TransferToCore(winner->GetPriority(), core_id, winner); - current_thread = winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread; + current_thread = + winner->GetPriority() <= current_thread->GetPriority() ? winner : current_thread; } if (current_thread != nullptr && current_thread->GetPriority() > priority) { diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index bec23a0e0..4d220c4f9 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -468,7 +468,7 @@ private: u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks. u64 last_running_ticks = 0; ///< CPU tick when thread was last running - u64 yield_count = 0; ///< Number of innecessaries yields occured. + u64 yield_count = 0; ///< Number of innecessaries yields occured. s32 processor_id = 0; From 44e09e5f21915391672558940842b92e3a64cb1b Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 7 Oct 2019 18:57:13 -0400 Subject: [PATCH 17/29] Kernel: Correct Results in Condition Variables and Mutexes --- src/core/hle/kernel/kernel.cpp | 13 +++++++------ src/core/hle/kernel/mutex.cpp | 1 + src/core/hle/kernel/svc.cpp | 27 +++++++++------------------ 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 77edbcd1f..002c5af2b 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -15,6 +15,7 @@ #include "core/core_timing_util.h" #include "core/hle/kernel/address_arbiter.h" #include "core/hle/kernel/client_port.h" +#include "core/hle/kernel/errors.h" #include "core/hle/kernel/handle_table.h" #include "core/hle/kernel/kernel.h" #include "core/hle/kernel/process.h" @@ -60,12 +61,8 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ if (thread->HasWakeupCallback()) { resume = thread->InvokeWakeupCallback(ThreadWakeupReason::Timeout, thread, nullptr, 0); } - } - - if (thread->GetMutexWaitAddress() != 0 || thread->GetCondVarWaitAddress() != 0 || - thread->GetWaitHandle() != 0) { - ASSERT(thread->GetStatus() == ThreadStatus::WaitMutex || - thread->GetStatus() == ThreadStatus::WaitCondVar); + } else if (thread->GetStatus() == ThreadStatus::WaitMutex || + thread->GetStatus() == ThreadStatus::WaitCondVar) { thread->SetMutexWaitAddress(0); thread->SetCondVarWaitAddress(0); thread->SetWaitHandle(0); @@ -85,6 +82,10 @@ static void ThreadWakeupCallback(u64 thread_handle, [[maybe_unused]] s64 cycles_ } if (resume) { + if (thread->GetStatus() == ThreadStatus::WaitCondVar || + thread->GetStatus() == ThreadStatus::WaitArb) { + thread->SetWaitSynchronizationResult(RESULT_TIMEOUT); + } thread->ResumeFromWait(); } } diff --git a/src/core/hle/kernel/mutex.cpp b/src/core/hle/kernel/mutex.cpp index eb919246c..663d0f4b6 100644 --- a/src/core/hle/kernel/mutex.cpp +++ b/src/core/hle/kernel/mutex.cpp @@ -139,6 +139,7 @@ ResultCode Mutex::Release(VAddr address) { thread->SetCondVarWaitAddress(0); thread->SetMutexWaitAddress(0); thread->SetWaitHandle(0); + thread->SetWaitSynchronizationResult(RESULT_SUCCESS); system.PrepareReschedule(); diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index bd67fc96d..823d1d403 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1677,18 +1677,20 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var // Atomically read the value of the mutex. u32 mutex_val = 0; + u32 update_val = 0; + const VAddr mutex_address = thread->GetMutexWaitAddress(); do { - monitor.SetExclusive(current_core, thread->GetMutexWaitAddress()); + monitor.SetExclusive(current_core, mutex_address); // If the mutex is not yet acquired, acquire it. - mutex_val = Memory::Read32(thread->GetMutexWaitAddress()); + mutex_val = Memory::Read32(mutex_address); if (mutex_val != 0) { - monitor.ClearExclusive(); - break; + update_val = mutex_val | Mutex::MutexHasWaitersFlag; + } else { + update_val = thread->GetWaitHandle(); } - } while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(), - thread->GetWaitHandle())); + } while (!monitor.ExclusiveWrite32(current_core, mutex_address, update_val)); if (mutex_val == 0) { // We were able to acquire the mutex, resume this thread. ASSERT(thread->GetStatus() == ThreadStatus::WaitCondVar); @@ -1702,20 +1704,9 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var thread->SetLockOwner(nullptr); thread->SetMutexWaitAddress(0); thread->SetWaitHandle(0); + thread->SetWaitSynchronizationResult(RESULT_SUCCESS); system.PrepareReschedule(thread->GetProcessorID()); } else { - // Atomically signal that the mutex now has a waiting thread. - do { - monitor.SetExclusive(current_core, thread->GetMutexWaitAddress()); - - // Ensure that the mutex value is still what we expect. - u32 value = Memory::Read32(thread->GetMutexWaitAddress()); - // TODO(Subv): When this happens, the kernel just clears the exclusive state and - // retries the initial read for this thread. - ASSERT_MSG(mutex_val == value, "Unhandled synchronization primitive case"); - } while (!monitor.ExclusiveWrite32(current_core, thread->GetMutexWaitAddress(), - mutex_val | Mutex::MutexHasWaitersFlag)); - // The mutex is already owned by some other thread, make this thread wait on it. const Handle owner_handle = static_cast(mutex_val & Mutex::MutexOwnerMask); const auto& handle_table = system.Kernel().CurrentProcess()->GetHandleTable(); From 71768571775ff989a577a773574f5f5bdeb14d33 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 7 Oct 2019 19:09:57 -0400 Subject: [PATCH 18/29] Kernel: Corrections to ModifyByWaitingCountAndSignalToAddressIfEqual --- src/core/hle/kernel/address_arbiter.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/core/hle/kernel/address_arbiter.cpp b/src/core/hle/kernel/address_arbiter.cpp index 4c1d3fd18..de0a9064e 100644 --- a/src/core/hle/kernel/address_arbiter.cpp +++ b/src/core/hle/kernel/address_arbiter.cpp @@ -91,12 +91,20 @@ ResultCode AddressArbiter::ModifyByWaitingCountAndSignalToAddressIfEqual(VAddr a // Determine the modified value depending on the waiting count. s32 updated_value; - if (waiting_threads.empty()) { - updated_value = value + 1; - } else if (num_to_wake <= 0 || waiting_threads.size() <= static_cast(num_to_wake)) { - updated_value = value - 1; + if (num_to_wake <= 0) { + if (waiting_threads.empty()) { + updated_value = value + 1; + } else { + updated_value = value - 1; + } } else { - updated_value = value; + if (waiting_threads.empty()) { + updated_value = value + 1; + } else if (waiting_threads.size() <= static_cast(num_to_wake)) { + updated_value = value - 1; + } else { + updated_value = value; + } } if (static_cast(Memory::Read32(address)) != value) { From 27d571c08436e7131f67fed2771434a571c1e976 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 8 Oct 2019 18:35:04 -0400 Subject: [PATCH 19/29] Kernel: Correct redundant yields to only advance time forward. --- src/core/hle/kernel/svc.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 823d1d403..101f72b7d 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1577,10 +1577,12 @@ static void SleepThread(Core::System& system, s64 nanoseconds) { } if (redundant) { - system.CoreTiming().Idle(); - } else { - system.PrepareReschedule(current_thread->GetProcessorID()); + // If it's redundant, the core is pretty much idle. Some games keep idling + // a core while it's doing nothing, we advance timing to avoid costly continuos + // calls. + system.CoreTiming().AddTicks(2000); } + system.PrepareReschedule(current_thread->GetProcessorID()); } /// Wait process wide key atomic From 1c6a11ab142d18c3444629940f183b7c1865a5e2 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 10 Oct 2019 08:04:14 -0400 Subject: [PATCH 20/29] Kernel: Corrections to Wait Objects clearing in which a thread could still be signalled after a timeout or a cancel. --- src/core/hle/kernel/thread.cpp | 1 + src/core/hle/kernel/thread.h | 3 +++ src/core/hle/kernel/wait_object.cpp | 3 --- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index ae62609e3..563a99bfc 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -133,6 +133,7 @@ void Thread::ResumeFromWait() { void Thread::CancelWait() { ASSERT(GetStatus() == ThreadStatus::WaitSynch); + ClearWaitObjects(); SetWaitSynchronizationResult(ERR_SYNCHRONIZATION_CANCELED); ResumeFromWait(); } diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 4d220c4f9..ceb4d5159 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -319,6 +319,9 @@ public: } void ClearWaitObjects() { + for (const auto& waiting_object : wait_objects) { + waiting_object->RemoveWaitingThread(this); + } wait_objects.clear(); } diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp index 50ed2a2f1..0f833fb3a 100644 --- a/src/core/hle/kernel/wait_object.cpp +++ b/src/core/hle/kernel/wait_object.cpp @@ -85,9 +85,6 @@ void WaitObject::WakeupWaitingThread(SharedPtr thread) { const std::size_t index = thread->GetWaitObjectIndex(this); - for (const auto& object : thread->GetWaitObjects()) { - object->RemoveWaitingThread(thread.get()); - } thread->ClearWaitObjects(); thread->CancelWakeupTimer(); From 96b1b144afff4ae4dd2d33547b8a62c46c920a84 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 10 Oct 2019 08:50:41 -0400 Subject: [PATCH 21/29] Kernel: Correct Paused scheduling --- src/core/hle/kernel/thread.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 563a99bfc..0871a2f00 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -354,9 +354,7 @@ void Thread::SetActivity(ThreadActivity value) { if (value == ThreadActivity::Paused) { // Set status if not waiting - if (status == ThreadStatus::Ready) { - status = ThreadStatus::Paused; - } else if (status == ThreadStatus::Running) { + if (status == ThreadStatus::Ready || status == ThreadStatus::Running) { SetStatus(ThreadStatus::Paused); Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule(); } From 0b72b34d89d8e3dd06fadfded728f7202bc34741 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 12 Oct 2019 07:57:32 -0400 Subject: [PATCH 22/29] KernelSVC: Assert that condition variable address is aligned to 4 bytes. --- src/core/hle/kernel/svc.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 101f72b7d..151acf33a 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1608,6 +1608,8 @@ static ResultCode WaitProcessWideKeyAtomic(Core::System& system, VAddr mutex_add return ERR_INVALID_ADDRESS; } + ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4)); + auto* const current_process = system.Kernel().CurrentProcess(); const auto& handle_table = current_process->GetHandleTable(); SharedPtr thread = handle_table.Get(thread_handle); @@ -1639,6 +1641,8 @@ static ResultCode SignalProcessWideKey(Core::System& system, VAddr condition_var LOG_TRACE(Kernel_SVC, "called, condition_variable_addr=0x{:X}, target=0x{:08X}", condition_variable_addr, target); + ASSERT(condition_variable_addr == Common::AlignDown(condition_variable_addr, 4)); + // Retrieve a list of all threads that are waiting for this condition variable. std::vector> waiting_threads; const auto& scheduler = system.GlobalScheduler(); From b3c1deba494d78158ea6764802880b249fe64416 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 12 Oct 2019 08:02:34 -0400 Subject: [PATCH 23/29] Kernel_Thread: Eliminate most global accessors. --- src/core/hle/kernel/thread.cpp | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 0871a2f00..7208bbb11 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -41,8 +41,8 @@ Thread::~Thread() = default; void Thread::Stop() { // Cancel any outstanding wakeup events for this thread - Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), - callback_handle); + kernel.System().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), + callback_handle); kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); callback_handle = 0; SetStatus(ThreadStatus::Dead); @@ -68,13 +68,13 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { // This function might be called from any thread so we have to be cautious and use the // thread-safe version of ScheduleEvent. const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds}); - Core::System::GetInstance().CoreTiming().ScheduleEvent( - cycles, kernel.ThreadWakeupCallbackEventType(), callback_handle); + kernel.System().CoreTiming().ScheduleEvent(cycles, kernel.ThreadWakeupCallbackEventType(), + callback_handle); } void Thread::CancelWakeupTimer() { - Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), - callback_handle); + kernel.System().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), + callback_handle); } static std::optional GetNextProcessorId(u64 mask) { @@ -176,7 +176,7 @@ ResultVal> Thread::Create(KernelCore& kernel, std::string name return ResultCode(-1); } - auto& system = Core::System::GetInstance(); + auto& system = kernel.System(); SharedPtr thread(new Thread(kernel)); thread->thread_id = kernel.CreateNewThreadID(); @@ -258,7 +258,7 @@ void Thread::SetStatus(ThreadStatus new_status) { } if (status == ThreadStatus::Running) { - last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); + last_running_ticks = kernel.System().CoreTiming().GetTicks(); } status = new_status; @@ -356,7 +356,7 @@ void Thread::SetActivity(ThreadActivity value) { // Set status if not waiting if (status == ThreadStatus::Ready || status == ThreadStatus::Running) { SetStatus(ThreadStatus::Paused); - Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule(); + kernel.System().CpuCore(processor_id).PrepareReschedule(); } } else if (status == ThreadStatus::Paused) { // Ready to reschedule @@ -476,7 +476,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { return; } - auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + auto& scheduler = kernel.System().GlobalScheduler(); if (processor_id >= 0) { scheduler.Unschedule(old_priority, processor_id, this); } @@ -508,7 +508,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { } void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { - auto& scheduler = Core::System::GetInstance().GlobalScheduler(); + auto& scheduler = kernel.System().GlobalScheduler(); if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || current_priority >= THREADPRIO_COUNT) { return; From 25f8606a6dab595eb7a92fce9be32e0489079964 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 12 Oct 2019 08:21:51 -0400 Subject: [PATCH 24/29] Kernel Scheduler: Make sure the global scheduler shutdowns correctly. --- src/common/multi_level_queue.h | 7 +++++++ src/core/core_cpu.cpp | 4 ++++ src/core/core_cpu.h | 2 ++ src/core/cpu_core_manager.cpp | 1 + src/core/hle/kernel/kernel.cpp | 2 ++ src/core/hle/kernel/scheduler.cpp | 8 ++++++++ src/core/hle/kernel/scheduler.h | 7 +++++++ 7 files changed, 31 insertions(+) diff --git a/src/common/multi_level_queue.h b/src/common/multi_level_queue.h index 9cb448f56..50acfdbf2 100644 --- a/src/common/multi_level_queue.h +++ b/src/common/multi_level_queue.h @@ -304,6 +304,13 @@ public: return levels[priority == Depth ? 63 : priority].back(); } + void clear() { + used_priorities = 0; + for (std::size_t i = 0; i < Depth; i++) { + levels[i].clear(); + } + } + private: using const_list_iterator = typename std::list::const_iterator; diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp index a6f63e437..233ea572c 100644 --- a/src/core/core_cpu.cpp +++ b/src/core/core_cpu.cpp @@ -117,4 +117,8 @@ void Cpu::Reschedule() { scheduler->TryDoContextSwitch(); } +void Cpu::Shutdown() { + scheduler->Shutdown(); +} + } // namespace Core diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h index 80261daf7..cafca8df7 100644 --- a/src/core/core_cpu.h +++ b/src/core/core_cpu.h @@ -84,6 +84,8 @@ public: return core_index; } + void Shutdown(); + static std::unique_ptr MakeExclusiveMonitor(std::size_t num_cores); private: diff --git a/src/core/cpu_core_manager.cpp b/src/core/cpu_core_manager.cpp index 16b384076..8efd410bb 100644 --- a/src/core/cpu_core_manager.cpp +++ b/src/core/cpu_core_manager.cpp @@ -58,6 +58,7 @@ void CpuCoreManager::Shutdown() { thread_to_cpu.clear(); for (auto& cpu_core : cores) { + cpu_core->Shutdown(); cpu_core.reset(); } diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 002c5af2b..0d6286f84 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -116,6 +116,8 @@ struct KernelCore::Impl { thread_wakeup_event_type = nullptr; preemption_event = nullptr; + global_scheduler.Shutdown(); + named_ports.clear(); } diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 226d15d88..122106267 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -342,6 +342,14 @@ bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, T } } +void GlobalScheduler::Shutdown() { + for (std::size_t core = 0; core < NUM_CPU_CORES; core++) { + scheduled_queue[core].clear(); + suggested_queue[core].clear(); + } + thread_list.clear(); +} + GlobalScheduler::~GlobalScheduler() = default; Scheduler::Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 core_id) diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 408e20c88..617553ae3 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -147,6 +147,8 @@ public: return reselection_pending.load(); } + void Shutdown(); + private: bool AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner); @@ -189,6 +191,11 @@ public: return context_switch_pending; } + void Shutdown() { + current_thread = nullptr; + selected_thread = nullptr; + } + private: friend class GlobalScheduler; /** From 3073615dbc214a53badc88da68eecbaaa73898de Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 12 Oct 2019 10:13:25 -0400 Subject: [PATCH 25/29] Kernel: Address Feedback. --- src/core/hle/kernel/kernel.h | 2 +- src/core/hle/kernel/scheduler.cpp | 53 ++++++++++++++++++++++--- src/core/hle/kernel/scheduler.h | 65 ++++++++++++------------------- src/core/hle/kernel/svc.cpp | 12 +++--- src/core/hle/kernel/thread.cpp | 17 ++++---- src/core/hle/kernel/thread.h | 16 ++++++-- 6 files changed, 98 insertions(+), 67 deletions(-) diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 0fc4d1f36..9fb8f52ec 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -21,11 +21,11 @@ namespace Kernel { class AddressArbiter; class ClientPort; +class GlobalScheduler; class HandleTable; class Process; class ResourceLimit; class Thread; -class GlobalScheduler; /// Represents a single instance of the kernel. class KernelCore { diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index 122106267..dabeb05d6 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -23,7 +23,7 @@ namespace Kernel { GlobalScheduler::GlobalScheduler(Core::System& system) : system{system} { - reselection_pending = false; + is_reselection_pending = false; } void GlobalScheduler::AddThread(SharedPtr thread) { @@ -61,7 +61,7 @@ void GlobalScheduler::SelectThread(u32 core) { } sched.selected_thread = thread; } - sched.context_switch_pending = sched.selected_thread != sched.current_thread; + sched.is_context_switch_pending = sched.selected_thread != sched.current_thread; std::atomic_thread_fence(std::memory_order_seq_cst); }; Scheduler& sched = system.Scheduler(core); @@ -318,10 +318,18 @@ void GlobalScheduler::PreemptThreads() { } } - reselection_pending.store(true, std::memory_order_release); + is_reselection_pending.store(true, std::memory_order_release); } } +void GlobalScheduler::Suggest(u32 priority, u32 core, Thread* thread) { + suggested_queue[core].add(thread, priority); +} + +void GlobalScheduler::Unsuggest(u32 priority, u32 core, Thread* thread) { + suggested_queue[core].remove(thread, priority); +} + void GlobalScheduler::Schedule(u32 priority, u32 core, Thread* thread) { ASSERT_MSG(thread->GetProcessorID() == core, "Thread must be assigned to this core."); scheduled_queue[core].add(thread, priority); @@ -332,12 +340,40 @@ void GlobalScheduler::SchedulePrepend(u32 priority, u32 core, Thread* thread) { scheduled_queue[core].add(thread, priority, false); } +void GlobalScheduler::Reschedule(u32 priority, u32 core, Thread* thread) { + scheduled_queue[core].remove(thread, priority); + scheduled_queue[core].add(thread, priority); +} + +void GlobalScheduler::Unschedule(u32 priority, u32 core, Thread* thread) { + scheduled_queue[core].remove(thread, priority); +} + +void GlobalScheduler::TransferToCore(u32 priority, s32 destination_core, Thread* thread) { + const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT; + const s32 source_core = thread->GetProcessorID(); + if (source_core == destination_core || !schedulable) { + return; + } + thread->SetProcessorID(destination_core); + if (source_core >= 0) { + Unschedule(priority, source_core, thread); + } + if (destination_core >= 0) { + Unsuggest(priority, destination_core, thread); + Schedule(priority, destination_core, thread); + } + if (source_core >= 0) { + Suggest(priority, source_core, thread); + } +} + bool GlobalScheduler::AskForReselectionOrMarkRedundant(Thread* current_thread, Thread* winner) { if (current_thread == winner) { current_thread->IncrementYieldCount(); return true; } else { - reselection_pending.store(true, std::memory_order_release); + is_reselection_pending.store(true, std::memory_order_release); return false; } } @@ -378,7 +414,7 @@ u64 Scheduler::GetLastContextSwitchTicks() const { } void Scheduler::TryDoContextSwitch() { - if (context_switch_pending) { + if (is_context_switch_pending ) { SwitchContext(); } } @@ -409,7 +445,7 @@ void Scheduler::SwitchContext() { Thread* const previous_thread = GetCurrentThread(); Thread* const new_thread = GetSelectedThread(); - context_switch_pending = false; + is_context_switch_pending = false; if (new_thread == previous_thread) { return; } @@ -477,4 +513,9 @@ void Scheduler::UpdateLastContextSwitchTime(Thread* thread, Process* process) { last_context_switch_time = most_recent_switch_ticks; } +void Scheduler::Shutdown() { + current_thread = nullptr; + selected_thread = nullptr; +} + } // namespace Kernel diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h index 617553ae3..fcae28e0a 100644 --- a/src/core/hle/kernel/scheduler.h +++ b/src/core/hle/kernel/scheduler.h @@ -39,15 +39,11 @@ public: // Add a thread to the suggested queue of a cpu core. Suggested threads may be // picked if no thread is scheduled to run on the core. - void Suggest(u32 priority, u32 core, Thread* thread) { - suggested_queue[core].add(thread, priority); - } + void Suggest(u32 priority, u32 core, Thread* thread); // Remove a thread to the suggested queue of a cpu core. Suggested threads may be // picked if no thread is scheduled to run on the core. - void Unsuggest(u32 priority, u32 core, Thread* thread) { - suggested_queue[core].remove(thread, priority); - } + void Unsuggest(u32 priority, u32 core, Thread* thread); // Add a thread to the scheduling queue of a cpu core. The thread is added at the // back the queue in its priority level @@ -58,37 +54,15 @@ public: void SchedulePrepend(u32 priority, u32 core, Thread* thread); // Reschedule an already scheduled thread based on a new priority - void Reschedule(u32 priority, u32 core, Thread* thread) { - scheduled_queue[core].remove(thread, priority); - scheduled_queue[core].add(thread, priority); - } + void Reschedule(u32 priority, u32 core, Thread* thread); // Unschedule a thread. - void Unschedule(u32 priority, u32 core, Thread* thread) { - scheduled_queue[core].remove(thread, priority); - } + void Unschedule(u32 priority, u32 core, Thread* thread); // Transfers a thread into an specific core. If the destination_core is -1 // it will be unscheduled from its source code and added into its suggested // queue. - void TransferToCore(u32 priority, s32 destination_core, Thread* thread) { - const bool schedulable = thread->GetPriority() < THREADPRIO_COUNT; - const s32 source_core = thread->GetProcessorID(); - if (source_core == destination_core || !schedulable) { - return; - } - thread->SetProcessorID(destination_core); - if (source_core >= 0) { - Unschedule(priority, source_core, thread); - } - if (destination_core >= 0) { - Unsuggest(priority, destination_core, thread); - Schedule(priority, destination_core, thread); - } - if (source_core >= 0) { - Suggest(priority, source_core, thread); - } - } + void TransferToCore(u32 priority, s32 destination_core, Thread* thread); /* * UnloadThread selects a core and forces it to unload its current thread's context @@ -133,6 +107,12 @@ public: */ bool YieldThreadAndWaitForLoadBalancing(Thread* thread); + /* + * PreemptThreads this operation rotates the scheduling queues of threads at + * a preemption priority and then does some core rebalancing. Preemption priorities + * can be found in the array 'preemption_priorities'. This operation happens + * every 10ms. + */ void PreemptThreads(); u32 CpuCoresCount() const { @@ -140,11 +120,11 @@ public: } void SetReselectionPending() { - reselection_pending.store(true, std::memory_order_release); + is_reselection_pending.store(true, std::memory_order_release); } bool IsReselectionPending() const { - return reselection_pending.load(); + return is_reselection_pending.load(std::memory_order_acquire); } void Shutdown(); @@ -155,8 +135,10 @@ private: static constexpr u32 min_regular_priority = 2; std::array, NUM_CPU_CORES> scheduled_queue; std::array, NUM_CPU_CORES> suggested_queue; - std::atomic reselection_pending; + std::atomic is_reselection_pending; + // `preemption_priorities` are the priority levels at which the global scheduler + // preempts threads every 10 ms. They are ordered from Core 0 to Core 3 std::array preemption_priorities = {59, 59, 59, 62}; /// Lists all thread ids that aren't deleted/etc. @@ -166,7 +148,7 @@ private: class Scheduler final { public: - explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, const u32 core_id); + explicit Scheduler(Core::System& system, Core::ARM_Interface& cpu_core, u32 core_id); ~Scheduler(); /// Returns whether there are any threads that are ready to run. @@ -175,26 +157,27 @@ public: /// Reschedules to the next available thread (call after current thread is suspended) void TryDoContextSwitch(); + /// Unloads currently running thread void UnloadThread(); + /// Select the threads in top of the scheduling multilist. void SelectThreads(); /// Gets the current running thread Thread* GetCurrentThread() const; + /// Gets the currently selected thread from the top of the multilevel queue Thread* GetSelectedThread() const; /// Gets the timestamp for the last context switch in ticks. u64 GetLastContextSwitchTicks() const; bool ContextSwitchPending() const { - return context_switch_pending; + return is_context_switch_pending; } - void Shutdown() { - current_thread = nullptr; - selected_thread = nullptr; - } + /// Shutdowns the scheduler. + void Shutdown(); private: friend class GlobalScheduler; @@ -226,7 +209,7 @@ private: u64 idle_selection_count = 0; const u32 core_id; - bool context_switch_pending = false; + bool is_context_switch_pending = false; }; } // namespace Kernel diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp index 151acf33a..f64236be1 100644 --- a/src/core/hle/kernel/svc.cpp +++ b/src/core/hle/kernel/svc.cpp @@ -1556,18 +1556,18 @@ static void SleepThread(Core::System& system, s64 nanoseconds) { auto& scheduler = system.CurrentScheduler(); auto* const current_thread = scheduler.GetCurrentThread(); - bool redundant = false; + bool is_redundant = false; if (nanoseconds <= 0) { switch (static_cast(nanoseconds)) { case SleepType::YieldWithoutLoadBalancing: - redundant = current_thread->YieldSimple(); + is_redundant = current_thread->YieldSimple(); break; case SleepType::YieldWithLoadBalancing: - redundant = current_thread->YieldAndBalanceLoad(); + is_redundant = current_thread->YieldAndBalanceLoad(); break; case SleepType::YieldAndWaitForLoadBalancing: - redundant = current_thread->YieldAndWaitForLoadBalancing(); + is_redundant = current_thread->YieldAndWaitForLoadBalancing(); break; default: UNREACHABLE_MSG("Unimplemented sleep yield type '{:016X}'!", nanoseconds); @@ -1576,9 +1576,9 @@ static void SleepThread(Core::System& system, s64 nanoseconds) { current_thread->Sleep(nanoseconds); } - if (redundant) { + if (is_redundant) { // If it's redundant, the core is pretty much idle. Some games keep idling - // a core while it's doing nothing, we advance timing to avoid costly continuos + // a core while it's doing nothing, we advance timing to avoid costly continuous // calls. system.CoreTiming().AddTicks(2000); } diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 7208bbb11..8663fe5ee 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -389,13 +389,13 @@ bool Thread::YieldAndWaitForLoadBalancing() { void Thread::SetSchedulingStatus(ThreadSchedStatus new_status) { const u32 old_flags = scheduling_state; - scheduling_state = - (scheduling_state & ThreadSchedMasks::HighMask) | static_cast(new_status); + scheduling_state = (scheduling_state & static_cast(ThreadSchedMasks::HighMask)) | + static_cast(new_status); AdjustSchedulingOnStatus(old_flags); } void Thread::SetCurrentPriority(u32 new_priority) { - u32 old_priority = std::exchange(current_priority, new_priority); + const u32 old_priority = std::exchange(current_priority, new_priority); AdjustSchedulingOnPriority(old_priority); } @@ -410,10 +410,9 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { }; const bool use_override = affinity_override_count != 0; - // The value -3 is "do not change the ideal core". - if (new_core == -3) { + if (new_core == static_cast(CoreFlags::DontChangeIdealCore)) { new_core = use_override ? ideal_core_override : ideal_core; - if ((new_affinity_mask & (1 << new_core)) == 0) { + if ((new_affinity_mask & (1ULL << new_core)) == 0) { return ERR_INVALID_COMBINATION; } } @@ -444,14 +443,14 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) { } auto& scheduler = kernel.GlobalScheduler(); - if (static_cast(old_flags & ThreadSchedMasks::LowMask) == + if (static_cast(old_flags & static_cast(ThreadSchedMasks::LowMask)) == ThreadSchedStatus::Runnable) { // In this case the thread was running, now it's pausing/exitting if (processor_id >= 0) { scheduler.Unschedule(current_priority, processor_id, this); } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { if (core != processor_id && ((affinity_mask >> core) & 1) != 0) { scheduler.Unsuggest(current_priority, core, this); } @@ -462,7 +461,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) { scheduler.Schedule(current_priority, processor_id, this); } - for (u32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { + for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { if (core != processor_id && ((affinity_mask >> core) & 1) != 0) { scheduler.Suggest(current_priority, core, this); } diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index ceb4d5159..e0f3b6204 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -82,19 +82,25 @@ enum class ThreadSchedStatus : u32 { Exited = 3, }; -enum ThreadSchedFlags : u32 { +enum class ThreadSchedFlags : u32 { ProcessPauseFlag = 1 << 4, ThreadPauseFlag = 1 << 5, ProcessDebugPauseFlag = 1 << 6, KernelInitPauseFlag = 1 << 8, }; -enum ThreadSchedMasks : u32 { +enum class ThreadSchedMasks : u32 { LowMask = 0x000f, HighMask = 0xfff0, ForcePauseMask = 0x0070, }; +enum class CoreFlags : s32 { + IgnoreIdealCore = -1, + ProcessIdealCore = -2, + DontChangeIdealCore = -3, +}; + class Thread final : public WaitObject { public: using MutexWaitingThreads = std::vector>; @@ -428,7 +434,8 @@ public: } ThreadSchedStatus GetSchedulingStatus() const { - return static_cast(scheduling_state & ThreadSchedMasks::LowMask); + return static_cast(scheduling_state & + static_cast(ThreadSchedMasks::LowMask)); } bool IsRunning() const { @@ -471,7 +478,8 @@ private: u64 total_cpu_time_ticks = 0; ///< Total CPU running ticks. u64 last_running_ticks = 0; ///< CPU tick when thread was last running - u64 yield_count = 0; ///< Number of innecessaries yields occured. + u64 yield_count = 0; ///< Number of redundant yields carried by this thread. + ///< a redundant yield is one where no scheduling is changed s32 processor_id = 0; From c32520ceb7cf2180fbbed11e9bd5f9df03409e1d Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 12 Oct 2019 10:21:33 -0400 Subject: [PATCH 26/29] Kernel: Reverse global accessor removal. --- src/core/hle/kernel/kernel.cpp | 8 -------- src/core/hle/kernel/kernel.h | 6 ------ src/core/hle/kernel/thread.cpp | 16 ++++++++-------- src/core/hle/kernel/wait_object.cpp | 2 +- 4 files changed, 9 insertions(+), 23 deletions(-) diff --git a/src/core/hle/kernel/kernel.cpp b/src/core/hle/kernel/kernel.cpp index 0d6286f84..f94ac150d 100644 --- a/src/core/hle/kernel/kernel.cpp +++ b/src/core/hle/kernel/kernel.cpp @@ -232,14 +232,6 @@ const Kernel::GlobalScheduler& KernelCore::GlobalScheduler() const { return impl->global_scheduler; } -Core::System& KernelCore::System() { - return impl->system; -} - -const Core::System& KernelCore::System() const { - return impl->system; -} - void KernelCore::AddNamedPort(std::string name, SharedPtr port) { impl->named_ports.emplace(std::move(name), std::move(port)); } diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h index 9fb8f52ec..c4397fc77 100644 --- a/src/core/hle/kernel/kernel.h +++ b/src/core/hle/kernel/kernel.h @@ -82,12 +82,6 @@ public: /// Gets the sole instance of the global scheduler const Kernel::GlobalScheduler& GlobalScheduler() const; - /// Gets the sole instance of the system - Core::System& System(); - - /// Gets the sole instance of the system - const Core::System& System() const; - /// Adds a port to the named port table void AddNamedPort(std::string name, SharedPtr port); diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 8663fe5ee..0c11da1e0 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -41,7 +41,7 @@ Thread::~Thread() = default; void Thread::Stop() { // Cancel any outstanding wakeup events for this thread - kernel.System().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), + Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle); kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); callback_handle = 0; @@ -68,12 +68,12 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { // This function might be called from any thread so we have to be cautious and use the // thread-safe version of ScheduleEvent. const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds}); - kernel.System().CoreTiming().ScheduleEvent(cycles, kernel.ThreadWakeupCallbackEventType(), + Core::System::GetInstance().CoreTiming().ScheduleEvent(cycles, kernel.ThreadWakeupCallbackEventType(), callback_handle); } void Thread::CancelWakeupTimer() { - kernel.System().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), + Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), callback_handle); } @@ -176,7 +176,7 @@ ResultVal> Thread::Create(KernelCore& kernel, std::string name return ResultCode(-1); } - auto& system = kernel.System(); + auto& system = Core::System::GetInstance(); SharedPtr thread(new Thread(kernel)); thread->thread_id = kernel.CreateNewThreadID(); @@ -258,7 +258,7 @@ void Thread::SetStatus(ThreadStatus new_status) { } if (status == ThreadStatus::Running) { - last_running_ticks = kernel.System().CoreTiming().GetTicks(); + last_running_ticks = Core::System::GetInstance().CoreTiming().GetTicks(); } status = new_status; @@ -356,7 +356,7 @@ void Thread::SetActivity(ThreadActivity value) { // Set status if not waiting if (status == ThreadStatus::Ready || status == ThreadStatus::Running) { SetStatus(ThreadStatus::Paused); - kernel.System().CpuCore(processor_id).PrepareReschedule(); + Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule(); } } else if (status == ThreadStatus::Paused) { // Ready to reschedule @@ -475,7 +475,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { if (GetSchedulingStatus() != ThreadSchedStatus::Runnable) { return; } - auto& scheduler = kernel.System().GlobalScheduler(); + auto& scheduler = Core::System::GetInstance().GlobalScheduler(); if (processor_id >= 0) { scheduler.Unschedule(old_priority, processor_id, this); } @@ -507,7 +507,7 @@ void Thread::AdjustSchedulingOnPriority(u32 old_priority) { } void Thread::AdjustSchedulingOnAffinity(u64 old_affinity_mask, s32 old_core) { - auto& scheduler = kernel.System().GlobalScheduler(); + auto& scheduler = Core::System::GetInstance().GlobalScheduler(); if (GetSchedulingStatus() != ThreadSchedStatus::Runnable || current_priority >= THREADPRIO_COUNT) { return; diff --git a/src/core/hle/kernel/wait_object.cpp b/src/core/hle/kernel/wait_object.cpp index 0f833fb3a..c00cef062 100644 --- a/src/core/hle/kernel/wait_object.cpp +++ b/src/core/hle/kernel/wait_object.cpp @@ -95,7 +95,7 @@ void WaitObject::WakeupWaitingThread(SharedPtr thread) { } if (resume) { thread->ResumeFromWait(); - kernel.System().PrepareReschedule(thread->GetProcessorID()); + Core::System::GetInstance().PrepareReschedule(thread->GetProcessorID()); } } From a3524879be351f3726a622217d5c2d928ae92b42 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 12 Oct 2019 10:28:44 -0400 Subject: [PATCH 27/29] Kernel: Clang Format --- src/core/hle/kernel/scheduler.cpp | 2 +- src/core/hle/kernel/thread.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp index dabeb05d6..e6dcb9639 100644 --- a/src/core/hle/kernel/scheduler.cpp +++ b/src/core/hle/kernel/scheduler.cpp @@ -414,7 +414,7 @@ u64 Scheduler::GetLastContextSwitchTicks() const { } void Scheduler::TryDoContextSwitch() { - if (is_context_switch_pending ) { + if (is_context_switch_pending) { SwitchContext(); } } diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 0c11da1e0..3408658e5 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -42,7 +42,7 @@ Thread::~Thread() = default; void Thread::Stop() { // Cancel any outstanding wakeup events for this thread Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), - callback_handle); + callback_handle); kernel.ThreadWakeupCallbackHandleTable().Close(callback_handle); callback_handle = 0; SetStatus(ThreadStatus::Dead); @@ -68,13 +68,13 @@ void Thread::WakeAfterDelay(s64 nanoseconds) { // This function might be called from any thread so we have to be cautious and use the // thread-safe version of ScheduleEvent. const s64 cycles = Core::Timing::nsToCycles(std::chrono::nanoseconds{nanoseconds}); - Core::System::GetInstance().CoreTiming().ScheduleEvent(cycles, kernel.ThreadWakeupCallbackEventType(), - callback_handle); + Core::System::GetInstance().CoreTiming().ScheduleEvent( + cycles, kernel.ThreadWakeupCallbackEventType(), callback_handle); } void Thread::CancelWakeupTimer() { Core::System::GetInstance().CoreTiming().UnscheduleEvent(kernel.ThreadWakeupCallbackEventType(), - callback_handle); + callback_handle); } static std::optional GetNextProcessorId(u64 mask) { From e28c7f521765a85e27259539f0873b15c18a98f8 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 12 Oct 2019 10:38:55 -0400 Subject: [PATCH 28/29] Kernel: Address Feedback 2 --- src/core/hle/kernel/thread.cpp | 6 +++--- src/core/hle/kernel/thread.h | 9 +++------ 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 3408658e5..aeb20b24b 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -410,7 +410,7 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { }; const bool use_override = affinity_override_count != 0; - if (new_core == static_cast(CoreFlags::DontChangeIdealCore)) { + if (new_core == THREADDONTCHANGE_IDEAL) { new_core = use_override ? ideal_core_override : ideal_core; if ((new_affinity_mask & (1ULL << new_core)) == 0) { return ERR_INVALID_COMBINATION; @@ -452,7 +452,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) { for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { if (core != processor_id && ((affinity_mask >> core) & 1) != 0) { - scheduler.Unsuggest(current_priority, core, this); + scheduler.Unsuggest(current_priority, static_cast(core), this); } } } else if (GetSchedulingStatus() == ThreadSchedStatus::Runnable) { @@ -463,7 +463,7 @@ void Thread::AdjustSchedulingOnStatus(u32 old_flags) { for (s32 core = 0; core < GlobalScheduler::NUM_CPU_CORES; core++) { if (core != processor_id && ((affinity_mask >> core) & 1) != 0) { - scheduler.Suggest(current_priority, core, this); + scheduler.Suggest(current_priority, static_cast(core), this); } } } diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index e0f3b6204..7ee437e17 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -35,6 +35,9 @@ enum ThreadProcessorId : s32 { /// Run thread on the ideal core specified by the process. THREADPROCESSORID_IDEAL = -2, + /// when setting Core and Affiny, keeps the ideal core intact + THREADDONTCHANGE_IDEAL = -3, + /// Indicates that the preferred processor ID shouldn't be updated in /// a core mask setting operation. THREADPROCESSORID_DONT_UPDATE = -3, @@ -95,12 +98,6 @@ enum class ThreadSchedMasks : u32 { ForcePauseMask = 0x0070, }; -enum class CoreFlags : s32 { - IgnoreIdealCore = -1, - ProcessIdealCore = -2, - DontChangeIdealCore = -3, -}; - class Thread final : public WaitObject { public: using MutexWaitingThreads = std::vector>; From 64e652d8cbcc4cc67442879ab7e379d62b72703c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 12 Oct 2019 10:55:34 -0400 Subject: [PATCH 29/29] Kernel Thread: Cleanup THREADPROCESSORID_DONT_UPDATE. --- src/core/hle/kernel/thread.cpp | 2 +- src/core/hle/kernel/thread.h | 3 --- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index aeb20b24b..962530d2d 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -410,7 +410,7 @@ ResultCode Thread::SetCoreAndAffinityMask(s32 new_core, u64 new_affinity_mask) { }; const bool use_override = affinity_override_count != 0; - if (new_core == THREADDONTCHANGE_IDEAL) { + if (new_core == THREADPROCESSORID_DONT_UPDATE) { new_core = use_override ? ideal_core_override : ideal_core; if ((new_affinity_mask & (1ULL << new_core)) == 0) { return ERR_INVALID_COMBINATION; diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 7ee437e17..c9870873d 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -35,9 +35,6 @@ enum ThreadProcessorId : s32 { /// Run thread on the ideal core specified by the process. THREADPROCESSORID_IDEAL = -2, - /// when setting Core and Affiny, keeps the ideal core intact - THREADDONTCHANGE_IDEAL = -3, - /// Indicates that the preferred processor ID shouldn't be updated in /// a core mask setting operation. THREADPROCESSORID_DONT_UPDATE = -3,