Merge pull request #7454 from FernandoS27/new-core-timing
Core: Remake Core Timing
This commit is contained in:
commit
603952bc27
|
@ -47,6 +47,9 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) {
|
|||
case ThreadPriority::VeryHigh:
|
||||
windows_priority = THREAD_PRIORITY_HIGHEST;
|
||||
break;
|
||||
case ThreadPriority::Critical:
|
||||
windows_priority = THREAD_PRIORITY_TIME_CRITICAL;
|
||||
break;
|
||||
default:
|
||||
windows_priority = THREAD_PRIORITY_NORMAL;
|
||||
break;
|
||||
|
@ -59,9 +62,10 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) {
|
|||
void SetCurrentThreadPriority(ThreadPriority new_priority) {
|
||||
pthread_t this_thread = pthread_self();
|
||||
|
||||
s32 max_prio = sched_get_priority_max(SCHED_OTHER);
|
||||
s32 min_prio = sched_get_priority_min(SCHED_OTHER);
|
||||
u32 level = static_cast<u32>(new_priority) + 1;
|
||||
const auto scheduling_type = SCHED_OTHER;
|
||||
s32 max_prio = sched_get_priority_max(scheduling_type);
|
||||
s32 min_prio = sched_get_priority_min(scheduling_type);
|
||||
u32 level = std::max(static_cast<u32>(new_priority) + 1, 4U);
|
||||
|
||||
struct sched_param params;
|
||||
if (max_prio > min_prio) {
|
||||
|
@ -70,7 +74,7 @@ void SetCurrentThreadPriority(ThreadPriority new_priority) {
|
|||
params.sched_priority = min_prio - ((min_prio - max_prio) * level) / 4;
|
||||
}
|
||||
|
||||
pthread_setschedparam(this_thread, SCHED_OTHER, ¶ms);
|
||||
pthread_setschedparam(this_thread, scheduling_type, ¶ms);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -92,6 +92,7 @@ enum class ThreadPriority : u32 {
|
|||
Normal = 1,
|
||||
High = 2,
|
||||
VeryHigh = 3,
|
||||
Critical = 4,
|
||||
};
|
||||
|
||||
void SetCurrentThreadPriority(ThreadPriority new_priority);
|
||||
|
|
|
@ -30,6 +30,10 @@ namespace Common {
|
|||
#else
|
||||
return _udiv128(r[1], r[0], d, &remainder);
|
||||
#endif
|
||||
#else
|
||||
#ifdef __SIZEOF_INT128__
|
||||
const auto product = static_cast<unsigned __int128>(a) * static_cast<unsigned __int128>(b);
|
||||
return static_cast<u64>(product / d);
|
||||
#else
|
||||
const u64 diva = a / d;
|
||||
const u64 moda = a % d;
|
||||
|
@ -37,6 +41,7 @@ namespace Common {
|
|||
const u64 modb = b % d;
|
||||
return diva * b + moda * divb + moda * modb / d;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
// This function multiplies 2 u64 values and produces a u128 value;
|
||||
|
|
|
@ -75,8 +75,8 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
|
|||
}
|
||||
|
||||
u64 NativeClock::GetRTSC() {
|
||||
TimePoint new_time_point{};
|
||||
TimePoint current_time_point{};
|
||||
TimePoint new_time_point{};
|
||||
|
||||
current_time_point.pack = Common::AtomicLoad128(time_point.pack.data());
|
||||
do {
|
||||
|
@ -89,8 +89,7 @@ u64 NativeClock::GetRTSC() {
|
|||
new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
|
||||
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
|
||||
current_time_point.pack, current_time_point.pack));
|
||||
/// The clock cannot be more precise than the guest timer, remove the lower bits
|
||||
return new_time_point.inner.accumulated_ticks & inaccuracy_mask;
|
||||
return new_time_point.inner.accumulated_ticks;
|
||||
}
|
||||
|
||||
void NativeClock::Pause(bool is_paused) {
|
||||
|
|
|
@ -37,12 +37,8 @@ private:
|
|||
} inner;
|
||||
};
|
||||
|
||||
/// value used to reduce the native clocks accuracy as some apss rely on
|
||||
/// undefined behavior where the level of accuracy in the clock shouldn't
|
||||
/// be higher.
|
||||
static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
|
||||
|
||||
TimePoint time_point;
|
||||
|
||||
// factors
|
||||
u64 clock_rtsc_factor{};
|
||||
u64 cpu_rtsc_factor{};
|
||||
|
|
|
@ -6,7 +6,9 @@
|
|||
#include <string>
|
||||
#include <tuple>
|
||||
|
||||
#include "common/logging/log.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/thread.h"
|
||||
#include "core/core_timing.h"
|
||||
#include "core/core_timing_util.h"
|
||||
#include "core/hardware_properties.h"
|
||||
|
@ -41,11 +43,11 @@ CoreTiming::CoreTiming()
|
|||
|
||||
CoreTiming::~CoreTiming() = default;
|
||||
|
||||
void CoreTiming::ThreadEntry(CoreTiming& instance) {
|
||||
constexpr char name[] = "yuzu:HostTiming";
|
||||
MicroProfileOnThreadCreate(name);
|
||||
Common::SetCurrentThreadName(name);
|
||||
Common::SetCurrentThreadPriority(Common::ThreadPriority::VeryHigh);
|
||||
void CoreTiming::ThreadEntry(CoreTiming& instance, size_t id) {
|
||||
const std::string name = "yuzu:HostTiming_" + std::to_string(id);
|
||||
MicroProfileOnThreadCreate(name.c_str());
|
||||
Common::SetCurrentThreadName(name.c_str());
|
||||
Common::SetCurrentThreadPriority(Common::ThreadPriority::Critical);
|
||||
instance.on_thread_init();
|
||||
instance.ThreadLoop();
|
||||
MicroProfileOnThreadExit();
|
||||
|
@ -59,68 +61,97 @@ void CoreTiming::Initialize(std::function<void()>&& on_thread_init_) {
|
|||
const auto empty_timed_callback = [](std::uintptr_t, std::chrono::nanoseconds) {};
|
||||
ev_lost = CreateEvent("_lost_event", empty_timed_callback);
|
||||
if (is_multicore) {
|
||||
timer_thread = std::make_unique<std::thread>(ThreadEntry, std::ref(*this));
|
||||
const auto hardware_concurrency = std::thread::hardware_concurrency();
|
||||
size_t id = 0;
|
||||
worker_threads.emplace_back(ThreadEntry, std::ref(*this), id++);
|
||||
if (hardware_concurrency > 8) {
|
||||
worker_threads.emplace_back(ThreadEntry, std::ref(*this), id++);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CoreTiming::Shutdown() {
|
||||
paused = true;
|
||||
is_paused = true;
|
||||
shutting_down = true;
|
||||
pause_event.Set();
|
||||
event.Set();
|
||||
if (timer_thread) {
|
||||
timer_thread->join();
|
||||
std::atomic_thread_fence(std::memory_order_release);
|
||||
|
||||
event_cv.notify_all();
|
||||
wait_pause_cv.notify_all();
|
||||
for (auto& thread : worker_threads) {
|
||||
thread.join();
|
||||
}
|
||||
worker_threads.clear();
|
||||
ClearPendingEvents();
|
||||
timer_thread.reset();
|
||||
has_started = false;
|
||||
}
|
||||
|
||||
void CoreTiming::Pause(bool is_paused) {
|
||||
paused = is_paused;
|
||||
pause_event.Set();
|
||||
}
|
||||
|
||||
void CoreTiming::SyncPause(bool is_paused) {
|
||||
if (is_paused == paused && paused_set == paused) {
|
||||
void CoreTiming::Pause(bool is_paused_) {
|
||||
std::unique_lock main_lock(event_mutex);
|
||||
if (is_paused_ == paused_state.load(std::memory_order_relaxed)) {
|
||||
return;
|
||||
}
|
||||
Pause(is_paused);
|
||||
if (timer_thread) {
|
||||
if (!is_paused) {
|
||||
pause_event.Set();
|
||||
if (is_multicore) {
|
||||
is_paused = is_paused_;
|
||||
event_cv.notify_all();
|
||||
if (!is_paused_) {
|
||||
wait_pause_cv.notify_all();
|
||||
}
|
||||
}
|
||||
paused_state.store(is_paused_, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
void CoreTiming::SyncPause(bool is_paused_) {
|
||||
std::unique_lock main_lock(event_mutex);
|
||||
if (is_paused_ == paused_state.load(std::memory_order_relaxed)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (is_multicore) {
|
||||
is_paused = is_paused_;
|
||||
event_cv.notify_all();
|
||||
if (!is_paused_) {
|
||||
wait_pause_cv.notify_all();
|
||||
}
|
||||
}
|
||||
paused_state.store(is_paused_, std::memory_order_relaxed);
|
||||
if (is_multicore) {
|
||||
if (is_paused_) {
|
||||
wait_signal_cv.wait(main_lock, [this] { return pause_count == worker_threads.size(); });
|
||||
} else {
|
||||
wait_signal_cv.wait(main_lock, [this] { return pause_count == 0; });
|
||||
}
|
||||
event.Set();
|
||||
while (paused_set != is_paused)
|
||||
;
|
||||
}
|
||||
}
|
||||
|
||||
bool CoreTiming::IsRunning() const {
|
||||
return !paused_set;
|
||||
return !paused_state.load(std::memory_order_acquire);
|
||||
}
|
||||
|
||||
bool CoreTiming::HasPendingEvents() const {
|
||||
return !(wait_set && event_queue.empty());
|
||||
std::unique_lock main_lock(event_mutex);
|
||||
return !event_queue.empty() || pending_events.load(std::memory_order_relaxed) != 0;
|
||||
}
|
||||
|
||||
void CoreTiming::ScheduleEvent(std::chrono::nanoseconds ns_into_future,
|
||||
const std::shared_ptr<EventType>& event_type,
|
||||
std::uintptr_t user_data) {
|
||||
{
|
||||
std::scoped_lock scope{basic_lock};
|
||||
|
||||
std::unique_lock main_lock(event_mutex);
|
||||
const u64 timeout = static_cast<u64>((GetGlobalTimeNs() + ns_into_future).count());
|
||||
|
||||
event_queue.emplace_back(Event{timeout, event_fifo_id++, user_data, event_type});
|
||||
pending_events.fetch_add(1, std::memory_order_relaxed);
|
||||
|
||||
std::push_heap(event_queue.begin(), event_queue.end(), std::greater<>());
|
||||
|
||||
if (is_multicore) {
|
||||
event_cv.notify_one();
|
||||
}
|
||||
event.Set();
|
||||
}
|
||||
|
||||
void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type,
|
||||
std::uintptr_t user_data) {
|
||||
std::scoped_lock scope{basic_lock};
|
||||
std::unique_lock main_lock(event_mutex);
|
||||
const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
|
||||
return e.type.lock().get() == event_type.get() && e.user_data == user_data;
|
||||
});
|
||||
|
@ -129,6 +160,7 @@ void CoreTiming::UnscheduleEvent(const std::shared_ptr<EventType>& event_type,
|
|||
if (itr != event_queue.end()) {
|
||||
event_queue.erase(itr, event_queue.end());
|
||||
std::make_heap(event_queue.begin(), event_queue.end(), std::greater<>());
|
||||
pending_events.fetch_sub(1, std::memory_order_relaxed);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -168,11 +200,12 @@ u64 CoreTiming::GetClockTicks() const {
|
|||
}
|
||||
|
||||
void CoreTiming::ClearPendingEvents() {
|
||||
std::unique_lock main_lock(event_mutex);
|
||||
event_queue.clear();
|
||||
}
|
||||
|
||||
void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
|
||||
std::scoped_lock lock{basic_lock};
|
||||
std::unique_lock main_lock(event_mutex);
|
||||
|
||||
const auto itr = std::remove_if(event_queue.begin(), event_queue.end(), [&](const Event& e) {
|
||||
return e.type.lock().get() == event_type.get();
|
||||
|
@ -186,21 +219,28 @@ void CoreTiming::RemoveEvent(const std::shared_ptr<EventType>& event_type) {
|
|||
}
|
||||
|
||||
std::optional<s64> CoreTiming::Advance() {
|
||||
std::scoped_lock lock{advance_lock, basic_lock};
|
||||
global_timer = GetGlobalTimeNs().count();
|
||||
|
||||
std::unique_lock main_lock(event_mutex);
|
||||
while (!event_queue.empty() && event_queue.front().time <= global_timer) {
|
||||
Event evt = std::move(event_queue.front());
|
||||
std::pop_heap(event_queue.begin(), event_queue.end(), std::greater<>());
|
||||
event_queue.pop_back();
|
||||
basic_lock.unlock();
|
||||
|
||||
if (const auto event_type{evt.type.lock()}) {
|
||||
event_type->callback(
|
||||
evt.user_data, std::chrono::nanoseconds{static_cast<s64>(global_timer - evt.time)});
|
||||
sequence_mutex.lock();
|
||||
event_mutex.unlock();
|
||||
|
||||
event_type->guard.lock();
|
||||
sequence_mutex.unlock();
|
||||
const s64 delay = static_cast<s64>(GetGlobalTimeNs().count() - evt.time);
|
||||
event_type->callback(evt.user_data, std::chrono::nanoseconds{delay});
|
||||
event_type->guard.unlock();
|
||||
|
||||
event_mutex.lock();
|
||||
pending_events.fetch_sub(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
basic_lock.lock();
|
||||
global_timer = GetGlobalTimeNs().count();
|
||||
}
|
||||
|
||||
|
@ -213,26 +253,34 @@ std::optional<s64> CoreTiming::Advance() {
|
|||
}
|
||||
|
||||
void CoreTiming::ThreadLoop() {
|
||||
const auto predicate = [this] { return !event_queue.empty() || is_paused; };
|
||||
has_started = true;
|
||||
while (!shutting_down) {
|
||||
while (!paused) {
|
||||
paused_set = false;
|
||||
while (!is_paused && !shutting_down) {
|
||||
const auto next_time = Advance();
|
||||
if (next_time) {
|
||||
if (*next_time > 0) {
|
||||
std::chrono::nanoseconds next_time_ns = std::chrono::nanoseconds(*next_time);
|
||||
event.WaitFor(next_time_ns);
|
||||
std::unique_lock main_lock(event_mutex);
|
||||
event_cv.wait_for(main_lock, next_time_ns, predicate);
|
||||
}
|
||||
} else {
|
||||
wait_set = true;
|
||||
event.Wait();
|
||||
std::unique_lock main_lock(event_mutex);
|
||||
event_cv.wait(main_lock, predicate);
|
||||
}
|
||||
wait_set = false;
|
||||
}
|
||||
paused_set = true;
|
||||
std::unique_lock main_lock(event_mutex);
|
||||
pause_count++;
|
||||
if (pause_count == worker_threads.size()) {
|
||||
clock->Pause(true);
|
||||
pause_event.Wait();
|
||||
wait_signal_cv.notify_all();
|
||||
}
|
||||
wait_pause_cv.wait(main_lock, [this] { return !is_paused || shutting_down; });
|
||||
pause_count--;
|
||||
if (pause_count == 0) {
|
||||
clock->Pause(false);
|
||||
wait_signal_cv.notify_all();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <condition_variable>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
|
@ -14,7 +15,6 @@
|
|||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/thread.h"
|
||||
#include "common/wall_clock.h"
|
||||
|
||||
namespace Core::Timing {
|
||||
|
@ -32,6 +32,7 @@ struct EventType {
|
|||
TimedCallback callback;
|
||||
/// A pointer to the name of the event.
|
||||
const std::string name;
|
||||
mutable std::mutex guard;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -131,7 +132,7 @@ private:
|
|||
/// Clear all pending events. This should ONLY be done on exit.
|
||||
void ClearPendingEvents();
|
||||
|
||||
static void ThreadEntry(CoreTiming& instance);
|
||||
static void ThreadEntry(CoreTiming& instance, size_t id);
|
||||
void ThreadLoop();
|
||||
|
||||
std::unique_ptr<Common::WallClock> clock;
|
||||
|
@ -144,21 +145,25 @@ private:
|
|||
// accomodated by the standard adaptor class.
|
||||
std::vector<Event> event_queue;
|
||||
u64 event_fifo_id = 0;
|
||||
std::atomic<size_t> pending_events{};
|
||||
|
||||
std::shared_ptr<EventType> ev_lost;
|
||||
Common::Event event{};
|
||||
Common::Event pause_event{};
|
||||
std::mutex basic_lock;
|
||||
std::mutex advance_lock;
|
||||
std::unique_ptr<std::thread> timer_thread;
|
||||
std::atomic<bool> paused{};
|
||||
std::atomic<bool> paused_set{};
|
||||
std::atomic<bool> wait_set{};
|
||||
std::atomic<bool> shutting_down{};
|
||||
std::atomic<bool> has_started{};
|
||||
std::function<void()> on_thread_init{};
|
||||
|
||||
std::vector<std::thread> worker_threads;
|
||||
|
||||
std::condition_variable event_cv;
|
||||
std::condition_variable wait_pause_cv;
|
||||
std::condition_variable wait_signal_cv;
|
||||
mutable std::mutex event_mutex;
|
||||
mutable std::mutex sequence_mutex;
|
||||
|
||||
std::atomic<bool> paused_state{};
|
||||
bool is_paused{};
|
||||
bool shutting_down{};
|
||||
bool is_multicore{};
|
||||
size_t pause_count{};
|
||||
|
||||
/// Cycle timing
|
||||
u64 ticks{};
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <chrono>
|
||||
#include <cstdlib>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <string>
|
||||
|
||||
#include "core/core.h"
|
||||
|
@ -21,13 +22,14 @@ std::array<s64, 5> delays{};
|
|||
|
||||
std::bitset<CB_IDS.size()> callbacks_ran_flags;
|
||||
u64 expected_callback = 0;
|
||||
std::mutex control_mutex;
|
||||
|
||||
template <unsigned int IDX>
|
||||
void HostCallbackTemplate(std::uintptr_t user_data, std::chrono::nanoseconds ns_late) {
|
||||
std::unique_lock<std::mutex> lk(control_mutex);
|
||||
static_assert(IDX < CB_IDS.size(), "IDX out of range");
|
||||
callbacks_ran_flags.set(IDX);
|
||||
REQUIRE(CB_IDS[IDX] == user_data);
|
||||
REQUIRE(CB_IDS[IDX] == CB_IDS[calls_order[expected_callback]]);
|
||||
delays[IDX] = ns_late.count();
|
||||
++expected_callback;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue