From 5b7ec71fb7cc81ea0f98f019cb0dac0b6bcb2fa2 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 15 Feb 2019 13:15:28 -0400
Subject: [PATCH 1/4] Correct CNTPCT to use Clock Cycles instead of Cpu Cycles.

---
 src/core/arm/dynarmic/arm_dynarmic.cpp | 5 +++--
 src/core/core_timing_util.cpp          | 7 +++++++
 src/core/core_timing_util.h            | 3 +++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 9b7ca4030..d36538257 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -12,6 +12,7 @@
 #include "core/core.h"
 #include "core/core_cpu.h"
 #include "core/core_timing.h"
+#include "core/core_timing_util.h"
 #include "core/gdbstub/gdbstub.h"
 #include "core/hle/kernel/process.h"
 #include "core/hle/kernel/svc.h"
@@ -119,7 +120,7 @@ public:
         return std::max(parent.core_timing.GetDowncount(), 0);
     }
     u64 GetCNTPCT() override {
-        return parent.core_timing.GetTicks();
+        return CpuCyclesToClockCycles(parent.core_timing.GetTicks());
     }
 
     ARM_Dynarmic& parent;
@@ -151,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
     config.tpidr_el0 = &cb->tpidr_el0;
     config.dczid_el0 = 4;
     config.ctr_el0 = 0x8444c004;
-    config.cntfrq_el0 = 19200000; // Value from fusee.
+    config.cntfrq_el0 = Timing::CNTFREQ; // Value from fusee.
 
     // Unpredictable instructions
     config.define_unpredictable_behaviour = true;
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index 88ff70233..8fc92560a 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -60,4 +60,11 @@ s64 nsToCycles(u64 ns) {
     return (BASE_CLOCK_RATE * static_cast<s64>(ns)) / 1000000000;
 }
 
+u64 CpuCyclesToClockCycles(u64 ticks) {
+    u64 result = ticks;
+    result *= CNTFREQ;
+    result /= BASE_CLOCK_RATE;
+    return static_cast<u64>(result);
+}
+
 } // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 513cfac1b..545d3a260 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -11,6 +11,7 @@ namespace Core::Timing {
 // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
 // The exact value used is of course unverified.
 constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
+constexpr u64 CNTFREQ = 19200000;  // Value from fusee.
 
 inline s64 msToCycles(int ms) {
     // since ms is int there is no way to overflow
@@ -61,4 +62,6 @@ inline u64 cyclesToMs(s64 cycles) {
     return cycles * 1000 / BASE_CLOCK_RATE;
 }
 
+u64 CpuCyclesToClockCycles(u64 ticks);
+
 } // namespace Core::Timing

From 3ea48e8ebe25686f2342cd79b32409fcd1bccb28 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 15 Feb 2019 19:26:41 -0400
Subject: [PATCH 2/4] Implement 128 bits Unsigned Integer Multiplication and
 Division.

---
 src/common/CMakeLists.txt |  2 ++
 src/common/uint128.cpp    | 18 ++++++++++++++++++
 src/common/uint128.h      | 30 ++++++++++++++++++++++++++++++
 3 files changed, 50 insertions(+)
 create mode 100644 src/common/uint128.cpp
 create mode 100644 src/common/uint128.h

diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index bdd885273..b0174b445 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -113,6 +113,8 @@ add_library(common STATIC
     threadsafe_queue.h
     timer.cpp
     timer.h
+    uint128.cpp
+    uint128.h
     vector_math.h
     web_result.h
 )
diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
new file mode 100644
index 000000000..aea7f03e2
--- /dev/null
+++ b/src/common/uint128.cpp
@@ -0,0 +1,18 @@
+
+namespace Common {
+
+std::pair<u64, u64> udiv128(u128 dividend, u64 divisor) {
+    u64 remainder = dividend[0] % divisor;
+    u64 accum = dividend[0] / divisor;
+    if (dividend[1] == 0)
+        return {accum, remainder};
+    // We ignore dividend[1] / divisor as that overflows
+    u64 first_segment = (dividend[1] % divisor) << 32;
+    accum += (first_segment / divisor) << 32;
+    u64 second_segment = (first_segment % divisor) << 32;
+    accum += (second_segment / divisor);
+    remainder += second_segment % divisor;
+    return {accum, remainder};
+}
+
+} // namespace Common
diff --git a/src/common/uint128.h b/src/common/uint128.h
new file mode 100644
index 000000000..fda313bcc
--- /dev/null
+++ b/src/common/uint128.h
@@ -0,0 +1,30 @@
+#include <array>
+#include <cstdint>
+#include <utility>
+#include <cstring>
+#include "common/common_types.h"
+
+namespace Common {
+
+#ifdef _MSC_VER
+#include <intrin.h>
+
+#pragma intrinsic(_umul128)
+#endif
+
+inline u128 umul128(u64 a, u64 b) {
+#ifdef _MSC_VER
+u128 result;
+result[0] = _umul128(a, b, &result[1]);
+#else
+unsigned __int128 tmp = a;
+tmp *= b;
+u128 result;
+std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+return result;
+}
+
+std::pair<u64, u64> udiv128(u128 dividend, u64 divisor);
+
+} // namespace Common

From ecccfe033777d6ae7d29bcf0cfc30412f7d3be24 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 15 Feb 2019 20:04:11 -0400
Subject: [PATCH 3/4] Use u128 on Clock Cycles calculation.

---
 src/common/uint128.cpp                 | 24 +++++++++++++++++++++++-
 src/common/uint128.h                   | 23 +++--------------------
 src/core/arm/dynarmic/arm_dynarmic.cpp |  2 +-
 src/core/core_timing_util.cpp          |  8 ++++----
 src/core/core_timing_util.h            |  2 +-
 5 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
index aea7f03e2..8548ba808 100644
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -1,7 +1,25 @@
+#ifdef _MSC_VER
+#include <intrin.h>
+
+#pragma intrinsic(_umul128)
+#endif
+#include "common/uint128.h"
 
 namespace Common {
+u128 Multiply64Into128(u64 a, u64 b) {
+#ifdef _MSC_VER
+    u128 result;
+    result[0] = _umul128(a, b, &result[1]);
+#else
+    unsigned __int128 tmp = a;
+    tmp *= b;
+    u128 result;
+    std::memcpy(&result, &tmp, sizeof(u128));
+#endif
+    return result;
+}
 
-std::pair<u64, u64> udiv128(u128 dividend, u64 divisor) {
+std::pair<u64, u64> Divide128On64(u128 dividend, u64 divisor) {
     u64 remainder = dividend[0] % divisor;
     u64 accum = dividend[0] / divisor;
     if (dividend[1] == 0)
@@ -12,6 +30,10 @@ std::pair<u64, u64> udiv128(u128 dividend, u64 divisor) {
     u64 second_segment = (first_segment % divisor) << 32;
     accum += (second_segment / divisor);
     remainder += second_segment % divisor;
+    if (remainder >= divisor) {
+        accum++;
+        remainder -= divisor;
+    }
     return {accum, remainder};
 }
 
diff --git a/src/common/uint128.h b/src/common/uint128.h
index fda313bcc..45e384c33 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -1,30 +1,13 @@
 #include <array>
 #include <cstdint>
-#include <utility>
 #include <cstring>
+#include <utility>
 #include "common/common_types.h"
 
 namespace Common {
 
-#ifdef _MSC_VER
-#include <intrin.h>
+u128 Multiply64Into128(u64 a, u64 b);
 
-#pragma intrinsic(_umul128)
-#endif
-
-inline u128 umul128(u64 a, u64 b) {
-#ifdef _MSC_VER
-u128 result;
-result[0] = _umul128(a, b, &result[1]);
-#else
-unsigned __int128 tmp = a;
-tmp *= b;
-u128 result;
-std::memcpy(&result, &tmp, sizeof(u128));
-#endif
-return result;
-}
-
-std::pair<u64, u64> udiv128(u128 dividend, u64 divisor);
+std::pair<u64, u64> Divide128On64(u128 dividend, u64 divisor);
 
 } // namespace Common
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index d36538257..25f76259b 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -120,7 +120,7 @@ public:
         return std::max(parent.core_timing.GetDowncount(), 0);
     }
     u64 GetCNTPCT() override {
-        return CpuCyclesToClockCycles(parent.core_timing.GetTicks());
+        return Timing::CpuCyclesToClockCycles(parent.core_timing.GetTicks());
     }
 
     ARM_Dynarmic& parent;
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index 8fc92560a..aab4aa697 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -7,6 +7,7 @@
 #include <cinttypes>
 #include <limits>
 #include "common/logging/log.h"
+#include "common/uint128.h"
 
 namespace Core::Timing {
 
@@ -61,10 +62,9 @@ s64 nsToCycles(u64 ns) {
 }
 
 u64 CpuCyclesToClockCycles(u64 ticks) {
-    u64 result = ticks;
-    result *= CNTFREQ;
-    result /= BASE_CLOCK_RATE;
-    return static_cast<u64>(result);
+    u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
+    std::pair<u64, u64> result = Common::Divide128On64(temporal, BASE_CLOCK_RATE);
+    return result.first;
 }
 
 } // namespace Core::Timing
diff --git a/src/core/core_timing_util.h b/src/core/core_timing_util.h
index 545d3a260..679aa3123 100644
--- a/src/core/core_timing_util.h
+++ b/src/core/core_timing_util.h
@@ -11,7 +11,7 @@ namespace Core::Timing {
 // The below clock rate is based on Switch's clockspeed being widely known as 1.020GHz
 // The exact value used is of course unverified.
 constexpr u64 BASE_CLOCK_RATE = 1019215872; // Switch clock speed is 1020MHz un/docked
-constexpr u64 CNTFREQ = 19200000;  // Value from fusee.
+constexpr u64 CNTFREQ = 19200000;           // Value from fusee.
 
 inline s64 msToCycles(int ms) {
     // since ms is int there is no way to overflow

From a8d4927e29bb1acbf5f3267f368801847acd4222 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 16 Feb 2019 16:52:24 -0400
Subject: [PATCH 4/4] Corrections, documenting and fixes.

---
 src/common/uint128.cpp                 | 11 ++++++-----
 src/common/uint128.h                   |  9 +++++----
 src/core/arm/dynarmic/arm_dynarmic.cpp |  2 +-
 src/core/core_timing_util.cpp          |  5 ++---
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/src/common/uint128.cpp b/src/common/uint128.cpp
index 8548ba808..2238a52c5 100644
--- a/src/common/uint128.cpp
+++ b/src/common/uint128.cpp
@@ -3,31 +3,32 @@
 
 #pragma intrinsic(_umul128)
 #endif
+#include <cstring>
 #include "common/uint128.h"
 
 namespace Common {
+
 u128 Multiply64Into128(u64 a, u64 b) {
-#ifdef _MSC_VER
     u128 result;
+#ifdef _MSC_VER
     result[0] = _umul128(a, b, &result[1]);
 #else
     unsigned __int128 tmp = a;
     tmp *= b;
-    u128 result;
     std::memcpy(&result, &tmp, sizeof(u128));
 #endif
     return result;
 }
 
-std::pair<u64, u64> Divide128On64(u128 dividend, u64 divisor) {
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor) {
     u64 remainder = dividend[0] % divisor;
     u64 accum = dividend[0] / divisor;
     if (dividend[1] == 0)
         return {accum, remainder};
     // We ignore dividend[1] / divisor as that overflows
-    u64 first_segment = (dividend[1] % divisor) << 32;
+    const u64 first_segment = (dividend[1] % divisor) << 32;
     accum += (first_segment / divisor) << 32;
-    u64 second_segment = (first_segment % divisor) << 32;
+    const u64 second_segment = (first_segment % divisor) << 32;
     accum += (second_segment / divisor);
     remainder += second_segment % divisor;
     if (remainder >= divisor) {
diff --git a/src/common/uint128.h b/src/common/uint128.h
index 45e384c33..52e6b46eb 100644
--- a/src/common/uint128.h
+++ b/src/common/uint128.h
@@ -1,13 +1,14 @@
-#include <array>
-#include <cstdint>
-#include <cstring>
+
 #include <utility>
 #include "common/common_types.h"
 
 namespace Common {
 
+// This function multiplies 2 u64 values and produces a u128 value;
 u128 Multiply64Into128(u64 a, u64 b);
 
-std::pair<u64, u64> Divide128On64(u128 dividend, u64 divisor);
+// This function divides a u128 by a u32 value and produces two u64 values:
+// the result of division and the remainder
+std::pair<u64, u64> Divide128On32(u128 dividend, u32 divisor);
 
 } // namespace Common
diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp
index 25f76259b..4fdc12f11 100644
--- a/src/core/arm/dynarmic/arm_dynarmic.cpp
+++ b/src/core/arm/dynarmic/arm_dynarmic.cpp
@@ -152,7 +152,7 @@ std::unique_ptr<Dynarmic::A64::Jit> ARM_Dynarmic::MakeJit() const {
     config.tpidr_el0 = &cb->tpidr_el0;
     config.dczid_el0 = 4;
     config.ctr_el0 = 0x8444c004;
-    config.cntfrq_el0 = Timing::CNTFREQ; // Value from fusee.
+    config.cntfrq_el0 = Timing::CNTFREQ;
 
     // Unpredictable instructions
     config.define_unpredictable_behaviour = true;
diff --git a/src/core/core_timing_util.cpp b/src/core/core_timing_util.cpp
index aab4aa697..7942f30d6 100644
--- a/src/core/core_timing_util.cpp
+++ b/src/core/core_timing_util.cpp
@@ -62,9 +62,8 @@ s64 nsToCycles(u64 ns) {
 }
 
 u64 CpuCyclesToClockCycles(u64 ticks) {
-    u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
-    std::pair<u64, u64> result = Common::Divide128On64(temporal, BASE_CLOCK_RATE);
-    return result.first;
+    const u128 temporal = Common::Multiply64Into128(ticks, CNTFREQ);
+    return Common::Divide128On32(temporal, static_cast<u32>(BASE_CLOCK_RATE)).first;
 }
 
 } // namespace Core::Timing