x64: Add MicroSleep
MicroSleep allows the processor to pause for a "short" amount of time (in the microsecond range). This is useful for spin-waiting that does not require nanosecond precision. This uses the new TPAUSE instruction introduced on Intel's newest processors as part of the waitpkg instructions. For CPUs that do not support waitpkg instructions, this is equivalent to yield(). Co-Authored-By: liamwhite <liamwhite@users.noreply.github.com>
This commit is contained in:
parent
d2cfe25b07
commit
27c33ab73f
|
@ -160,6 +160,8 @@ if(ARCHITECTURE_x86_64)
|
||||||
PRIVATE
|
PRIVATE
|
||||||
x64/cpu_detect.cpp
|
x64/cpu_detect.cpp
|
||||||
x64/cpu_detect.h
|
x64/cpu_detect.h
|
||||||
|
x64/cpu_wait.cpp
|
||||||
|
x64/cpu_wait.h
|
||||||
x64/native_clock.cpp
|
x64/native_clock.cpp
|
||||||
x64/native_clock.h
|
x64/native_clock.h
|
||||||
x64/xbyak_abi.h
|
x64/xbyak_abi.h
|
||||||
|
|
72
src/common/x64/cpu_wait.cpp
Normal file
72
src/common/x64/cpu_wait.cpp
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#include <thread>
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "common/x64/cpu_detect.h"
|
||||||
|
#include "common/x64/cpu_wait.h"
|
||||||
|
|
||||||
|
namespace Common::X64 {
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
__forceinline static u64 FencedRDTSC() {
|
||||||
|
_mm_lfence();
|
||||||
|
_ReadWriteBarrier();
|
||||||
|
const u64 result = __rdtsc();
|
||||||
|
_mm_lfence();
|
||||||
|
_ReadWriteBarrier();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
__forceinline static void TPAUSE() {
|
||||||
|
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
||||||
|
// For reference:
|
||||||
|
// At 1 GHz, 100K cycles is 100us
|
||||||
|
// At 2 GHz, 100K cycles is 50us
|
||||||
|
// At 4 GHz, 100K cycles is 25us
|
||||||
|
static constexpr auto PauseCycles = 100'000;
|
||||||
|
_tpause(0, FencedRDTSC() + PauseCycles);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static u64 FencedRDTSC() {
|
||||||
|
u64 result;
|
||||||
|
asm volatile("lfence\n\t"
|
||||||
|
"rdtsc\n\t"
|
||||||
|
"shl $32, %%rdx\n\t"
|
||||||
|
"or %%rdx, %0\n\t"
|
||||||
|
"lfence"
|
||||||
|
: "=a"(result)
|
||||||
|
:
|
||||||
|
: "rdx", "memory", "cc");
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void TPAUSE() {
|
||||||
|
// 100,000 cycles is a reasonable amount of time to wait to save on CPU resources.
|
||||||
|
// For reference:
|
||||||
|
// At 1 GHz, 100K cycles is 100us
|
||||||
|
// At 2 GHz, 100K cycles is 50us
|
||||||
|
// At 4 GHz, 100K cycles is 25us
|
||||||
|
static constexpr auto PauseCycles = 100'000;
|
||||||
|
const auto tsc = FencedRDTSC() + PauseCycles;
|
||||||
|
const auto eax = static_cast<u32>(tsc & 0xFFFFFFFF);
|
||||||
|
const auto edx = static_cast<u32>(tsc >> 32);
|
||||||
|
asm volatile("tpause %0" : : "r"(0), "d"(edx), "a"(eax));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void MicroSleep() {
|
||||||
|
static const bool has_waitpkg = GetCPUCaps().waitpkg;
|
||||||
|
|
||||||
|
if (has_waitpkg) {
|
||||||
|
TPAUSE();
|
||||||
|
} else {
|
||||||
|
std::this_thread::yield();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Common::X64
|
10
src/common/x64/cpu_wait.h
Normal file
10
src/common/x64/cpu_wait.h
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
namespace Common::X64 {
|
||||||
|
|
||||||
|
void MicroSleep();
|
||||||
|
|
||||||
|
} // namespace Common::X64
|
Loading…
Reference in a new issue