From 82b829625b89a706dd0d867c529f533fe928710c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 7 Jun 2019 12:56:30 -0400 Subject: [PATCH] video_core: Implement GPU side Syncpoints --- .../hle/service/nvdrv/devices/nvhost_gpu.cpp | 30 +++++++++++++++---- .../hle/service/nvdrv/devices/nvhost_gpu.h | 8 ++++- src/core/hle/service/nvdrv/nvdata.h | 2 ++ src/video_core/engines/maxwell_3d.cpp | 5 ++-- src/video_core/gpu.cpp | 24 +++++++++++++++ src/video_core/gpu.h | 24 +++++++++++++++ 6 files changed, 84 insertions(+), 9 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 8a53eddb1..9d1107594 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -143,7 +143,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector& input, std::vector& outp IoctlSubmitGpfifo params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", - params.address, params.num_entries, params.flags); + params.address, params.num_entries, params.flags.raw); ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) + params.num_entries * sizeof(Tegra::CommandListHeader), @@ -153,7 +153,17 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector& input, std::vector& outp std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], params.num_entries * sizeof(Tegra::CommandListHeader)); - Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); + UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); + UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); + + auto& gpu = Core::System::GetInstance().GPU(); + u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); + if (params.flags.increment.Value()) { + params.fence_out.value += current_syncpoint_value; + } else { + params.fence_out.value = current_syncpoint_value; + } + gpu.PushGPUEntries(std::move(entries)); // TODO(Blinkhawk): Figure how thoios fence is set // params.fence_out.value = 0; @@ -168,16 +178,24 @@ u32 nvhost_gpu::KickoffPB(const std::vector& input, std::vector& output) IoctlSubmitGpfifo params{}; std::memcpy(¶ms, input.data(), sizeof(IoctlSubmitGpfifo)); LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}", - params.address, params.num_entries, params.flags); + params.address, params.num_entries, params.flags.raw); Tegra::CommandList entries(params.num_entries); Memory::ReadBlock(params.address, entries.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); - Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); + UNIMPLEMENTED_IF(params.flags.add_wait.Value() != 0); + UNIMPLEMENTED_IF(params.flags.add_increment.Value() != 0); + + auto& gpu = Core::System::GetInstance().GPU(); + u32 current_syncpoint_value = gpu.GetSyncpointValue(params.fence_out.id); + if (params.flags.increment.Value()) { + params.fence_out.value += current_syncpoint_value; + } else { + params.fence_out.value = current_syncpoint_value; + } + gpu.PushGPUEntries(std::move(entries)); - // TODO(Blinkhawk): Figure how thoios fence is set - // params.fence_out.value = 0; std::memcpy(output.data(), ¶ms, output.size()); return 0; } diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h index d95cedb09..0729eeb8d 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h @@ -153,7 +153,13 @@ private: struct IoctlSubmitGpfifo { u64_le address; // pointer to gpfifo entry structs u32_le num_entries; // number of fence objects being submitted - u32_le flags; + union { + u32_le raw; + BitField<0, 1, u32_le> add_wait; // append a wait sync_point to the list + BitField<1, 1, u32_le> add_increment; // append an increment to the list + BitField<2, 1, u32_le> new_hw_format; // Mostly ignored + BitField<8, 1, u32_le> increment; // increment the returned fence + } flags; Fence fence_out; // returned new fence object for others to wait on }; static_assert(sizeof(IoctlSubmitGpfifo) == 16 + sizeof(Fence), diff --git a/src/core/hle/service/nvdrv/nvdata.h b/src/core/hle/service/nvdrv/nvdata.h index 7e1dce232..fd5f79f36 100644 --- a/src/core/hle/service/nvdrv/nvdata.h +++ b/src/core/hle/service/nvdrv/nvdata.h @@ -5,6 +5,8 @@ namespace Service::Nvidia { +constexpr u32 MaxSyncPoints = 192; + struct Fence { s32 id; u32 value; diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 8755b8af4..224c27bd2 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -346,8 +346,9 @@ void Maxwell3D::ProcessSyncPoint() { const u32 sync_point = regs.sync_info.sync_point.Value(); const u32 increment = regs.sync_info.increment.Value(); const u32 cache_flush = regs.sync_info.unknown.Value(); - LOG_DEBUG(HW_GPU, "Syncpoint set {}, increment: {}, unk: {}", sync_point, increment, - cache_flush); + if (increment) { + system.GPU().IncrementSyncPoint(sync_point); + } } void Maxwell3D::DrawArrays() { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 52706505b..1d12f0493 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -66,6 +66,30 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::IncrementSyncPoint(const u32 syncpoint_id) { + syncpoints[syncpoint_id]++; + if (!events[syncpoint_id].empty()) { + u32 value = syncpoints[syncpoint_id].load(); + auto it = events[syncpoint_id].begin(); + while (it != events[syncpoint_id].end()) { + if (value >= it->value) { + TriggerCpuInterrupt(it->event_id); + it = events[syncpoint_id].erase(it); + continue; + } + it++; + } + } +} + +u32 GPU::GetSyncpointValue(const u32 syncpoint_id) const { + return syncpoints[syncpoint_id].load(); +} + +void GPU::RegisterEvent(const u32 event_id, const u32 syncpoint_id, const u32 value) { + events[syncpoint_id].emplace_back(event_id, value); +} + u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { ASSERT(format != RenderTargetFormat::NONE); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index fe6628923..4c97d6c6f 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -5,8 +5,11 @@ #pragma once #include +#include +#include #include #include "common/common_types.h" +#include "core/hle/service/nvdrv/nvdata.h" #include "core/hle/service/nvflinger/buffer_queue.h" #include "video_core/dma_pusher.h" @@ -164,6 +167,12 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + void IncrementSyncPoint(const u32 syncpoint_id); + + u32 GetSyncpointValue(const u32 syncpoint_id) const; + + void RegisterEvent(const u32 event_id, const u32 sync_point_id, const u32 value); + /// Returns a const reference to the GPU DMA pusher. const Tegra::DmaPusher& DmaPusher() const; @@ -228,6 +237,11 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; +protected: + virtual void TriggerCpuInterrupt(const u32 event_id) const { + // Todo implement this + } + private: void ProcessBindMethod(const MethodCall& method_call); void ProcessSemaphoreTriggerMethod(); @@ -262,6 +276,16 @@ private: std::unique_ptr maxwell_dma; /// Inline memory engine std::unique_ptr kepler_memory; + + std::array, Service::Nvidia::MaxSyncPoints> syncpoints{}; + + struct Event { + Event(const u32 event_id, const u32 value) : event_id(event_id), value(value) {} + u32 event_id; + u32 value; + }; + + std::array, Service::Nvidia::MaxSyncPoints> events; }; #define ASSERT_REG_POSITION(field_name, position) \