From 82b829625b89a706dd0d867c529f533fe928710c Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 7 Jun 2019 12:56:30 -0400
Subject: [PATCH] video_core: Implement GPU side Syncpoints

---
 .../hle/service/nvdrv/devices/nvhost_gpu.cpp  | 30 +++++++++++++++----
 .../hle/service/nvdrv/devices/nvhost_gpu.h    |  8 ++++-
 src/core/hle/service/nvdrv/nvdata.h           |  2 ++
 src/video_core/engines/maxwell_3d.cpp         |  5 ++--
 src/video_core/gpu.cpp                        | 24 +++++++++++++++
 src/video_core/gpu.h                          | 24 +++++++++++++++
 6 files changed, 84 insertions(+), 9 deletions(-)
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 8a53eddb1..9d1107594 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -143,7 +143,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
     IoctlSubmitGpfifo params{};
     std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
     LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
-                params.address, params.num_entries, params.flags);
+                params.address, params.num_entries, params.flags.raw);
 
     ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
                                    params.num_entries * sizeof(Tegra::CommandListHeader),
@@ -153,7 +153,17 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
     std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
                 params.num_entries * sizeof(Tegra::CommandListHeader));
 
-    Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
+    UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
+    UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
+
+    auto& gpu = Core::System::GetInstance().GPU();
+    u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
+    if (params.flags.increment.Value()) {
+        params.fence_out.value += current_syncpoint_value;
+    } else {
+        params.fence_out.value = current_syncpoint_value;
+    }
+    gpu.PushGPUEntries(std::move(entries));
 
     // TODO(Blinkhawk): Figure how thoios fence is set
     // params.fence_out.value = 0;
@@ -168,16 +178,24 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
     IoctlSubmitGpfifo params{};
     std::memcpy(&params, input.data(), sizeof(IoctlSubmitGpfifo));
     LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
-                params.address, params.num_entries, params.flags);
+                params.address, params.num_entries, params.flags.raw);
 
     Tegra::CommandList entries(params.num_entries);
     Memory::ReadBlock(params.address, entries.data(),
                       params.num_entries * sizeof(Tegra::CommandListHeader));
 
-    Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries));
+    UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0);
+    UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0);
+
+    auto& gpu = Core::System::GetInstance().GPU();
+    u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id);
+    if (params.flags.increment.Value()) {
+        params.fence_out.value += current_syncpoint_value;
+    } else {
+        params.fence_out.value = current_syncpoint_value;
+    }
+    gpu.PushGPUEntries(std::move(entries));
 
-    // TODO(Blinkhawk): Figure how thoios fence is set
-    // params.fence_out.value = 0;
     std::memcpy(output.data(), &params, output.size());
     return 0;
 }
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index d95cedb09..0729eeb8d 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -153,7 +153,13 @@ private:
     struct IoctlSubmitGpfifo {
         u64_le address;     // pointer to gpfifo entry structs
         u32_le num_entries; // number of fence objects being submitted
-        u32_le flags;
+        union {
+            u32_le raw;
+            BitField<0, 1, u32_le> add_wait;      // append a wait sync_point to the list
+            BitField<1, 1, u32_le> add_increment; // append an increment to the list
+            BitField<2, 1, u32_le> new_hw_format; // Mostly ignored
+            BitField<8, 1, u32_le> increment;     // increment the returned fence
+        } flags;
         Fence fence_out; // returned new fence object for others to wait on
     };
     static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence),
diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h
index 7e1dce232..fd5f79f36 100644
--- a/src/core/hle/service/nvdrv/nvdata.h
+++ b/src/core/hle/service/nvdrv/nvdata.h
@@ -5,6 +5,8 @@
 
 namespace Service::Nvidia {
 
+constexpr u32 MaxSyncPoints = 192;
+
 struct Fence {
     s32 id;
     u32 value;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 8755b8af4..224c27bd2 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -346,8 +346,9 @@ void Maxwell3D::ProcessSyncPoint() {
     const u32 sync_point = regs.sync_info.sync_point.Value();
     const u32 increment = regs.sync_info.increment.Value();
     const u32 cache_flush = regs.sync_info.unknown.Value();
-    LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment,
-              cache_flush);
+    if (increment) {
+        system.GPU().IncrementSyncPoint(sync_point);
+    }
 }
 
 void Maxwell3D::DrawArrays() {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 52706505b..1d12f0493 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -66,6 +66,30 @@ const DmaPusher& GPU::DmaPusher() const {
     return *dma_pusher;
 }
 
+void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
+    syncpoints[syncpoint_id]++;
+    if (!events[syncpoint_id].empty()) {
+        u32 value = syncpoints[syncpoint_id].load();
+        auto it = events[syncpoint_id].begin();
+        while (it != events[syncpoint_id].end()) {
+            if (value >= it->value) {
+                TriggerCpuInterrupt(it->event_id);
+                it = events[syncpoint_id].erase(it);
+                continue;
+            }
+            it++;
+        }
+    }
+}
+
+u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const {
+    return syncpoints[syncpoint_id].load();
+}
+
+void GPU::RegisterEvent(const u32 event_id, const u32 syncpoint_id, const u32 value) {
+    events[syncpoint_id].emplace_back(event_id, value);
+}
+
 u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
     ASSERT(format != RenderTargetFormat::NONE);
 
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index fe6628923..4c97d6c6f 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -5,8 +5,11 @@
 #pragma once
 
 #include <array>
+#include <atomic>
+#include <list>
 #include <memory>
 #include "common/common_types.h"
+#include "core/hle/service/nvdrv/nvdata.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/dma_pusher.h"
 
@@ -164,6 +167,12 @@ public:
     /// Returns a reference to the GPU DMA pusher.
     Tegra::DmaPusher& DmaPusher();
 
+    void IncrementSyncPoint(const u32 syncpoint_id);
+
+    u32 GetSyncpointValue(const u32 syncpoint_id) const;
+
+    void RegisterEvent(const u32 event_id, const u32 sync_point_id, const u32 value);
+
     /// Returns a const reference to the GPU DMA pusher.
     const Tegra::DmaPusher& DmaPusher() const;
 
@@ -228,6 +237,11 @@ public:
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
     virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0;
 
+protected:
+    virtual void TriggerCpuInterrupt(const u32 event_id) const {
+        // Todo implement this
+    }
+
 private:
     void ProcessBindMethod(const MethodCall& method_call);
     void ProcessSemaphoreTriggerMethod();
@@ -262,6 +276,16 @@ private:
     std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
     /// Inline memory engine
     std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+
+    std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
+
+    struct Event {
+        Event(const u32 event_id, const u32 value) : event_id(event_id), value(value) {}
+        u32 event_id;
+        u32 value;
+    };
+
+    std::array<std::list<Event>, Service::Nvidia::MaxSyncPoints> events;
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \