Merge pull request #10935 from Morph1984/mwaitx

x64: Make use of monitorx instructions for power efficient sleeps (AMD)
This commit is contained in:
liamwhite 2023-06-29 10:01:26 -04:00 committed by GitHub
commit 5e70db0d43
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 41 additions and 14 deletions

View File

@ -93,6 +93,7 @@ void AppendCPUInfo(FieldCollection& fc) {
add_field("CPU_Extension_x64_GFNI", caps.gfni); add_field("CPU_Extension_x64_GFNI", caps.gfni);
add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc); add_field("CPU_Extension_x64_INVARIANT_TSC", caps.invariant_tsc);
add_field("CPU_Extension_x64_LZCNT", caps.lzcnt); add_field("CPU_Extension_x64_LZCNT", caps.lzcnt);
add_field("CPU_Extension_x64_MONITORX", caps.monitorx);
add_field("CPU_Extension_x64_MOVBE", caps.movbe); add_field("CPU_Extension_x64_MOVBE", caps.movbe);
add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq); add_field("CPU_Extension_x64_PCLMULQDQ", caps.pclmulqdq);
add_field("CPU_Extension_x64_POPCNT", caps.popcnt); add_field("CPU_Extension_x64_POPCNT", caps.popcnt);

View File

@ -168,6 +168,7 @@ static CPUCaps Detect() {
__cpuid(cpu_id, 0x80000001); __cpuid(cpu_id, 0x80000001);
caps.lzcnt = Common::Bit<5>(cpu_id[2]); caps.lzcnt = Common::Bit<5>(cpu_id[2]);
caps.fma4 = Common::Bit<16>(cpu_id[2]); caps.fma4 = Common::Bit<16>(cpu_id[2]);
caps.monitorx = Common::Bit<29>(cpu_id[2]);
} }
if (max_ex_fn >= 0x80000007) { if (max_ex_fn >= 0x80000007) {

View File

@ -63,6 +63,7 @@ struct CPUCaps {
bool gfni : 1; bool gfni : 1;
bool invariant_tsc : 1; bool invariant_tsc : 1;
bool lzcnt : 1; bool lzcnt : 1;
bool monitorx : 1;
bool movbe : 1; bool movbe : 1;
bool pclmulqdq : 1; bool pclmulqdq : 1;
bool popcnt : 1; bool popcnt : 1;

View File

@ -13,36 +13,60 @@
namespace Common::X64 { namespace Common::X64 {
#ifdef _MSC_VER namespace {
__forceinline static void TPAUSE() {
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. // 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
// For reference: // For reference:
// At 1 GHz, 100K cycles is 100us // At 1 GHz, 100K cycles is 100us
// At 2 GHz, 100K cycles is 50us // At 2 GHz, 100K cycles is 50us
// At 4 GHz, 100K cycles is 25us // At 4 GHz, 100K cycles is 25us
static constexpr auto PauseCycles = 100'000; constexpr auto PauseCycles = 100'000U;
_tpause(0, FencedRDTSC() + PauseCycles);
} // Anonymous namespace
#ifdef _MSC_VER
__forceinline static void TPAUSE() {
static constexpr auto RequestC02State = 0U;
_tpause(RequestC02State, FencedRDTSC() + PauseCycles);
}
__forceinline static void MWAITX() {
static constexpr auto EnableWaitTimeFlag = 1U << 1;
static constexpr auto RequestC1State = 0U;
// monitor_var should be aligned to a cache line.
alignas(64) u64 monitor_var{};
_mm_monitorx(&monitor_var, 0, 0);
_mm_mwaitx(EnableWaitTimeFlag, RequestC1State, PauseCycles);
} }
#else #else
static void TPAUSE() { static void TPAUSE() {
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources. static constexpr auto RequestC02State = 0U;
// For reference:
// At 1 GHz, 100K cycles is 100us
// At 2 GHz, 100K cycles is 50us
// At 4 GHz, 100K cycles is 25us
static constexpr auto PauseCycles = 100'000;
const auto tsc = FencedRDTSC() + PauseCycles; const auto tsc = FencedRDTSC() + PauseCycles;
const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF); const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
const auto edx = static_cast<u32>(tsc >> 32); const auto edx = static_cast<u32>(tsc >> 32);
asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax)); asm volatile("tpause %0" : : "r"(RequestC02State), "d"(edx), "a"(eax));
}
static void MWAITX() {
static constexpr auto EnableWaitTimeFlag = 1U << 1;
static constexpr auto RequestC1State = 0U;
// monitor_var should be aligned to a cache line.
alignas(64) u64 monitor_var{};
asm volatile("monitorx" : : "a"(&monitor_var), "c"(0), "d"(0));
asm volatile("mwaitx" : : "a"(RequestC1State), "b"(PauseCycles), "c"(EnableWaitTimeFlag));
} }
#endif #endif
void MicroSleep() { void MicroSleep() {
static const bool has_waitpkg = GetCPUCaps().waitpkg; static const bool has_waitpkg = GetCPUCaps().waitpkg;
static const bool has_monitorx = GetCPUCaps().monitorx;
if (has_waitpkg) { if (has_waitpkg) {
TPAUSE(); TPAUSE();
} else if (has_monitorx) {
MWAITX();
} else { } else {
std::this_thread::yield(); std::this_thread::yield();
} }