X86/NativeClock: Reimplement RTDSC access to be lock free.
This commit is contained in:
parent
d4f871cb6a
commit
53d92318b8
|
@ -98,7 +98,6 @@ add_library(common STATIC
|
||||||
algorithm.h
|
algorithm.h
|
||||||
alignment.h
|
alignment.h
|
||||||
assert.h
|
assert.h
|
||||||
atomic_ops.cpp
|
|
||||||
atomic_ops.h
|
atomic_ops.h
|
||||||
detached_tasks.cpp
|
detached_tasks.cpp
|
||||||
detached_tasks.h
|
detached_tasks.h
|
||||||
|
|
|
@ -1,75 +0,0 @@
|
||||||
// Copyright 2020 yuzu Emulator Project
|
|
||||||
// Licensed under GPLv2 or any later version
|
|
||||||
// Refer to the license.txt file included.
|
|
||||||
|
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
#include "common/atomic_ops.h"
|
|
||||||
|
|
||||||
#if _MSC_VER
|
|
||||||
#include <intrin.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace Common {
|
|
||||||
|
|
||||||
#if _MSC_VER
|
|
||||||
|
|
||||||
bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
|
|
||||||
const u8 result =
|
|
||||||
_InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
|
|
||||||
return result == expected;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
|
|
||||||
const u16 result =
|
|
||||||
_InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
|
|
||||||
return result == expected;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
|
|
||||||
const u32 result =
|
|
||||||
_InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
|
|
||||||
return result == expected;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
|
|
||||||
const u64 result = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer),
|
|
||||||
value, expected);
|
|
||||||
return result == expected;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
|
|
||||||
return _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
|
|
||||||
value[0],
|
|
||||||
reinterpret_cast<__int64*>(expected.data())) != 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
|
|
||||||
return __sync_bool_compare_and_swap(pointer, expected, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
|
|
||||||
return __sync_bool_compare_and_swap(pointer, expected, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
|
|
||||||
return __sync_bool_compare_and_swap(pointer, expected, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
|
|
||||||
return __sync_bool_compare_and_swap(pointer, expected, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
|
|
||||||
unsigned __int128 value_a;
|
|
||||||
unsigned __int128 expected_a;
|
|
||||||
std::memcpy(&value_a, value.data(), sizeof(u128));
|
|
||||||
std::memcpy(&expected_a, expected.data(), sizeof(u128));
|
|
||||||
return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
} // namespace Common
|
|
|
@ -4,14 +4,75 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
|
||||||
|
#if _MSC_VER
|
||||||
|
#include <intrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
|
||||||
[[nodiscard]] bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected);
|
#if _MSC_VER
|
||||||
[[nodiscard]] bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected);
|
|
||||||
[[nodiscard]] bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected);
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
|
||||||
[[nodiscard]] bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected);
|
const u8 result =
|
||||||
[[nodiscard]] bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected);
|
_InterlockedCompareExchange8(reinterpret_cast<volatile char*>(pointer), value, expected);
|
||||||
|
return result == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
|
||||||
|
const u16 result =
|
||||||
|
_InterlockedCompareExchange16(reinterpret_cast<volatile short*>(pointer), value, expected);
|
||||||
|
return result == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
|
||||||
|
const u32 result =
|
||||||
|
_InterlockedCompareExchange(reinterpret_cast<volatile long*>(pointer), value, expected);
|
||||||
|
return result == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
|
||||||
|
const u64 result = _InterlockedCompareExchange64(reinterpret_cast<volatile __int64*>(pointer),
|
||||||
|
value, expected);
|
||||||
|
return result == expected;
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
|
||||||
|
return _InterlockedCompareExchange128(reinterpret_cast<volatile __int64*>(pointer), value[1],
|
||||||
|
value[0],
|
||||||
|
reinterpret_cast<__int64*>(expected.data())) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u8* pointer, u8 value, u8 expected) {
|
||||||
|
return __sync_bool_compare_and_swap(pointer, expected, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u16* pointer, u16 value, u16 expected) {
|
||||||
|
return __sync_bool_compare_and_swap(pointer, expected, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u32* pointer, u32 value, u32 expected) {
|
||||||
|
return __sync_bool_compare_and_swap(pointer, expected, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u64 value, u64 expected) {
|
||||||
|
return __sync_bool_compare_and_swap(pointer, expected, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
[[nodiscard]] inline bool AtomicCompareAndSwap(volatile u64* pointer, u128 value, u128 expected) {
|
||||||
|
unsigned __int128 value_a;
|
||||||
|
unsigned __int128 expected_a;
|
||||||
|
std::memcpy(&value_a, value.data(), sizeof(u128));
|
||||||
|
std::memcpy(&expected_a, expected.data(), sizeof(u128));
|
||||||
|
return __sync_bool_compare_and_swap((unsigned __int128*)pointer, expected_a, value_a);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace Common
|
} // namespace Common
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "common/atomic_ops.h"
|
||||||
#include "common/uint128.h"
|
#include "common/uint128.h"
|
||||||
#include "common/x64/native_clock.h"
|
#include "common/x64/native_clock.h"
|
||||||
|
|
||||||
|
@ -102,8 +103,8 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
|
||||||
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
|
: WallClock(emulated_cpu_frequency_, emulated_clock_frequency_, true), rtsc_frequency{
|
||||||
rtsc_frequency_} {
|
rtsc_frequency_} {
|
||||||
_mm_mfence();
|
_mm_mfence();
|
||||||
last_measure = __rdtsc();
|
time_point.inner.last_measure = __rdtsc();
|
||||||
accumulated_ticks = 0U;
|
time_point.inner.accumulated_ticks = 0U;
|
||||||
ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency);
|
ns_rtsc_factor = GetFixedPoint64Factor(1000000000, rtsc_frequency);
|
||||||
us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency);
|
us_rtsc_factor = GetFixedPoint64Factor(1000000, rtsc_frequency);
|
||||||
ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency);
|
ms_rtsc_factor = GetFixedPoint64Factor(1000, rtsc_frequency);
|
||||||
|
@ -112,23 +113,35 @@ NativeClock::NativeClock(u64 emulated_cpu_frequency_, u64 emulated_clock_frequen
|
||||||
}
|
}
|
||||||
|
|
||||||
u64 NativeClock::GetRTSC() {
|
u64 NativeClock::GetRTSC() {
|
||||||
std::scoped_lock scope{rtsc_serialize};
|
TimePoint new_time_point{};
|
||||||
_mm_mfence();
|
TimePoint current_time_point{};
|
||||||
const u64 current_measure = __rdtsc();
|
do {
|
||||||
u64 diff = current_measure - last_measure;
|
current_time_point.pack = time_point.pack;
|
||||||
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
|
_mm_mfence();
|
||||||
if (current_measure > last_measure) {
|
const u64 current_measure = __rdtsc();
|
||||||
last_measure = current_measure;
|
u64 diff = current_measure - current_time_point.inner.last_measure;
|
||||||
}
|
diff = diff & ~static_cast<u64>(static_cast<s64>(diff) >> 63); // max(diff, 0)
|
||||||
accumulated_ticks += diff;
|
new_time_point.inner.last_measure = current_measure > current_time_point.inner.last_measure
|
||||||
|
? current_measure
|
||||||
|
: current_time_point.inner.last_measure;
|
||||||
|
new_time_point.inner.accumulated_ticks = current_time_point.inner.accumulated_ticks + diff;
|
||||||
|
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
|
||||||
|
current_time_point.pack));
|
||||||
/// The clock cannot be more precise than the guest timer, remove the lower bits
|
/// The clock cannot be more precise than the guest timer, remove the lower bits
|
||||||
return accumulated_ticks & inaccuracy_mask;
|
return new_time_point.inner.accumulated_ticks & inaccuracy_mask;
|
||||||
}
|
}
|
||||||
|
|
||||||
void NativeClock::Pause(bool is_paused) {
|
void NativeClock::Pause(bool is_paused) {
|
||||||
if (!is_paused) {
|
if (!is_paused) {
|
||||||
_mm_mfence();
|
TimePoint current_time_point{};
|
||||||
last_measure = __rdtsc();
|
TimePoint new_time_point{};
|
||||||
|
do {
|
||||||
|
current_time_point.pack = time_point.pack;
|
||||||
|
new_time_point.pack = current_time_point.pack;
|
||||||
|
_mm_mfence();
|
||||||
|
new_time_point.inner.last_measure = __rdtsc();
|
||||||
|
} while (!Common::AtomicCompareAndSwap(time_point.pack.data(), new_time_point.pack,
|
||||||
|
current_time_point.pack));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
|
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
|
||||||
#include "common/spin_lock.h"
|
|
||||||
#include "common/wall_clock.h"
|
#include "common/wall_clock.h"
|
||||||
|
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
@ -32,22 +31,29 @@ public:
|
||||||
private:
|
private:
|
||||||
u64 GetRTSC();
|
u64 GetRTSC();
|
||||||
|
|
||||||
|
union alignas(16) TimePoint {
|
||||||
|
TimePoint() : pack{} {}
|
||||||
|
u128 pack{};
|
||||||
|
struct Inner {
|
||||||
|
u64 last_measure{};
|
||||||
|
u64 accumulated_ticks{};
|
||||||
|
} inner;
|
||||||
|
};
|
||||||
|
|
||||||
/// value used to reduce the native clocks accuracy as some apss rely on
|
/// value used to reduce the native clocks accuracy as some apss rely on
|
||||||
/// undefined behavior where the level of accuracy in the clock shouldn't
|
/// undefined behavior where the level of accuracy in the clock shouldn't
|
||||||
/// be higher.
|
/// be higher.
|
||||||
static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
|
static constexpr u64 inaccuracy_mask = ~(UINT64_C(0x400) - 1);
|
||||||
|
|
||||||
SpinLock rtsc_serialize{};
|
TimePoint time_point;
|
||||||
u64 last_measure{};
|
|
||||||
u64 accumulated_ticks{};
|
|
||||||
u64 rtsc_frequency;
|
|
||||||
|
|
||||||
// factors
|
// factors
|
||||||
|
u64 clock_rtsc_factor{};
|
||||||
|
u64 cpu_rtsc_factor{};
|
||||||
u64 ns_rtsc_factor{};
|
u64 ns_rtsc_factor{};
|
||||||
u64 us_rtsc_factor{};
|
u64 us_rtsc_factor{};
|
||||||
u64 ms_rtsc_factor{};
|
u64 ms_rtsc_factor{};
|
||||||
u64 clock_rtsc_factor{};
|
|
||||||
u64 cpu_rtsc_factor{};
|
u64 rtsc_frequency;
|
||||||
};
|
};
|
||||||
} // namespace X64
|
} // namespace X64
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue