From da440da9f54cc860f3c69da685a415d5ec9d7b64 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 28 Jun 2023 19:32:50 +0200
Subject: [PATCH] Memory Tracking: Optimize tracking to only use atomic writes
 when contested with the host GPU

---
 src/core/core.cpp                             |  8 +++-
 src/core/gpu_dirty_memory_manager.h           | 14 +++++--
 src/core/memory.cpp                           | 39 ++++++++++++++++---
 src/core/memory.h                             |  6 ++-
 src/video_core/buffer_cache/buffer_cache.h    | 15 ++++++-
 .../buffer_cache/buffer_cache_base.h          |  5 ++-
 src/video_core/fence_manager.h                |  2 +-
 src/video_core/gpu.cpp                        | 10 ++++-
 src/video_core/gpu.h                          |  4 ++
 src/video_core/gpu_thread.cpp                 |  6 +--
 src/video_core/rasterizer_interface.h         |  4 +-
 .../renderer_null/null_rasterizer.cpp         |  5 ++-
 .../renderer_null/null_rasterizer.h           |  3 +-
 .../renderer_opengl/gl_rasterizer.cpp         | 35 ++++++++++++-----
 .../renderer_opengl/gl_rasterizer.h           |  3 +-
 .../renderer_vulkan/vk_rasterizer.cpp         | 25 +++++++++++-
 .../renderer_vulkan/vk_rasterizer.h           |  3 +-
 src/video_core/shader_cache.cpp               |  2 +-
 src/video_core/shader_cache.h                 |  2 +-
 19 files changed, 153 insertions(+), 38 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index deefeb301..9e3eb3795 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -55,7 +55,6 @@
 #include "video_core/renderer_base.h"
 #include "video_core/video_core.h"
 
-
 MICROPROFILE_DEFINE(ARM_CPU0, "ARM", "CPU 0", MP_RGB(255, 64, 64));
 MICROPROFILE_DEFINE(ARM_CPU1, "ARM", "CPU 1", MP_RGB(255, 64, 64));
 MICROPROFILE_DEFINE(ARM_CPU2, "ARM", "CPU 2", MP_RGB(255, 64, 64));
@@ -132,7 +131,10 @@ FileSys::VirtualFile GetGameFileFromPath(const FileSys::VirtualFilesystem& vfs,
 struct System::Impl {
     explicit Impl(System& system)
         : kernel{system}, fs_controller{system}, memory{system}, hid_core{}, room_network{},
-          cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system} {}
+          cpu_manager{system}, reporter{system}, applet_manager{system}, time_manager{system},
+          gpu_dirty_memory_write_manager{} {
+        memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
+    }
 
     void Initialize(System& system) {
         device_memory = std::make_unique<Core::DeviceMemory>();
@@ -236,6 +238,8 @@ struct System::Impl {
         // Setting changes may require a full system reinitialization (e.g., disabling multicore).
         ReinitializeIfNecessary(system);
 
+        memory.SetGPUDirtyManagers(gpu_dirty_memory_write_manager);
+
         kernel.Initialize();
         cpu_manager.Initialize();
 
diff --git a/src/core/gpu_dirty_memory_manager.h b/src/core/gpu_dirty_memory_manager.h
index 9c3d41d11..789b7530f 100644
--- a/src/core/gpu_dirty_memory_manager.h
+++ b/src/core/gpu_dirty_memory_manager.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
 #pragma once
 
 #include <atomic>
@@ -59,8 +62,7 @@ public:
                 mask = mask >> empty_bits;
 
                 const size_t continuous_bits = std::countr_one(mask);
-                callback((transform.address << Memory::YUZU_PAGEBITS) + offset,
-                         continuous_bits << align_bits);
+                callback((transform.address << page_bits) + offset, continuous_bits << align_bits);
                 mask = continuous_bits < align_size ? (mask >> continuous_bits) : 0;
                 offset += continuous_bits << align_bits;
             }
@@ -74,6 +76,10 @@ private:
         u64 mask;
     };
 
+    constexpr static size_t page_bits = Memory::YUZU_PAGEBITS;
+    constexpr static size_t page_size = 1ULL << page_bits;
+    constexpr static size_t page_mask = page_size - 1;
+
     constexpr static size_t align_bits = 6U;
     constexpr static size_t align_size = 1U << align_bits;
     constexpr static size_t align_mask = align_size - 1;
@@ -94,11 +100,11 @@ private:
     }
 
     TransformAddress BuildTransform(VAddr address, size_t size) {
-        const size_t minor_address = address & Memory::YUZU_PAGEMASK;
+        const size_t minor_address = address & page_mask;
         const size_t minor_bit = minor_address >> align_bits;
         const size_t top_bit = (minor_address + size + align_mask) >> align_bits;
         TransformAddress result{};
-        result.address = address >> Memory::YUZU_PAGEBITS;
+        result.address = address >> page_bits;
         result.mask = CreateMask<u64>(top_bit, minor_bit);
         return result;
     }
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index 60b246bdd..257406f09 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -3,6 +3,7 @@
 
 #include <algorithm>
 #include <cstring>
+#include <span>
 
 #include "common/assert.h"
 #include "common/atomic_ops.h"
@@ -679,7 +680,7 @@ struct Memory::Impl {
                 LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8,
                           GetInteger(vaddr), static_cast<u64>(data));
             },
-            [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
+            [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
         if (ptr) {
             std::memcpy(ptr, &data, sizeof(T));
         }
@@ -693,7 +694,7 @@ struct Memory::Impl {
                 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive{} @ 0x{:016X} = 0x{:016X}",
                           sizeof(T) * 8, GetInteger(vaddr), static_cast<u64>(data));
             },
-            [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(T)); });
+            [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(T)); });
         if (ptr) {
             const auto volatile_pointer = reinterpret_cast<volatile T*>(ptr);
             return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@@ -708,7 +709,7 @@ struct Memory::Impl {
                 LOG_ERROR(HW_Memory, "Unmapped WriteExclusive128 @ 0x{:016X} = 0x{:016X}{:016X}",
                           GetInteger(vaddr), static_cast<u64>(data[1]), static_cast<u64>(data[0]));
             },
-            [&]() { system.CurrentGPUDirtyMemoryManager().Collect(GetInteger(vaddr), sizeof(u128)); });
+            [&]() { HandleRasterizerWrite(GetInteger(vaddr), sizeof(u128)); });
         if (ptr) {
             const auto volatile_pointer = reinterpret_cast<volatile u64*>(ptr);
             return Common::AtomicCompareAndSwap(volatile_pointer, data, expected);
@@ -718,7 +719,7 @@ struct Memory::Impl {
 
     void HandleRasterizerDownload(VAddr address, size_t size) {
         const size_t core = system.GetCurrentHostThreadID();
-        auto& current_area = rasterizer_areas[core];
+        auto& current_area = rasterizer_read_areas[core];
         const VAddr end_address = address + size;
         if (current_area.start_address <= address && end_address <= current_area.end_address)
             [[likely]] {
@@ -727,9 +728,31 @@ struct Memory::Impl {
         current_area = system.GPU().OnCPURead(address, size);
     }
 
-    Common::PageTable* current_page_table = nullptr;
-    std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES> rasterizer_areas{};
+    void HandleRasterizerWrite(VAddr address, size_t size) {
+        const size_t core = system.GetCurrentHostThreadID();
+        auto& current_area = rasterizer_write_areas[core];
+        VAddr subaddress = address >> YUZU_PAGEBITS;
+        bool do_collection = current_area.last_address == subaddress;
+        if (!do_collection) [[unlikely]] {
+            do_collection = system.GPU().OnCPUWrite(address, size);
+            if (!do_collection) {
+                return;
+            }
+            current_area.last_address = subaddress;
+        }
+        gpu_dirty_managers[core].Collect(address, size);
+    }
+
+    struct GPUDirtyState {
+        VAddr last_address;
+    };
+
     Core::System& system;
+    Common::PageTable* current_page_table = nullptr;
+    std::array<VideoCore::RasterizerDownloadArea, Core::Hardware::NUM_CPU_CORES>
+        rasterizer_read_areas{};
+    std::array<GPUDirtyState, Core::Hardware::NUM_CPU_CORES> rasterizer_write_areas{};
+    std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
 };
 
 Memory::Memory(Core::System& system_) : system{system_} {
@@ -877,6 +900,10 @@ void Memory::ZeroBlock(Common::ProcessAddress dest_addr, const std::size_t size)
     impl->ZeroBlock(*system.ApplicationProcess(), dest_addr, size);
 }
 
+void Memory::SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers) {
+    impl->gpu_dirty_managers = managers;
+}
+
 Result Memory::InvalidateDataCache(Common::ProcessAddress dest_addr, const std::size_t size) {
     return impl->InvalidateDataCache(*system.ApplicationProcess(), dest_addr, size);
 }
diff --git a/src/core/memory.h b/src/core/memory.h
index 72a0be813..ea01824f8 100644
--- a/src/core/memory.h
+++ b/src/core/memory.h
@@ -5,6 +5,7 @@
 
 #include <cstddef>
 #include <memory>
+#include <span>
 #include <string>
 #include "common/typed_address.h"
 #include "core/hle/result.h"
@@ -15,7 +16,8 @@ struct PageTable;
 
 namespace Core {
 class System;
-}
+class GPUDirtyMemoryManager;
+} // namespace Core
 
 namespace Kernel {
 class PhysicalMemory;
@@ -458,6 +460,8 @@ public:
      */
     void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
 
+    void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
+
 private:
     Core::System& system;
 
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 9239ad862..b5ed3380f 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -132,6 +132,19 @@ void BufferCache<P>::CachedWriteMemory(VAddr cpu_addr, u64 size) {
     InlineMemoryImplementation(cpu_addr, size, tmp_buffer);
 }
 
+template <class P>
+bool BufferCache<P>::OnCPUWrite(VAddr cpu_addr, u64 size) {
+    const bool is_dirty = IsRegionRegistered(cpu_addr, size);
+    if (!is_dirty) {
+        return false;
+    }
+    if (memory_tracker.IsRegionGpuModified(cpu_addr, size)) {
+        return true;
+    }
+    WriteMemory(cpu_addr, size);
+    return false;
+}
+
 template <class P>
 std::optional<VideoCore::RasterizerDownloadArea> BufferCache<P>::GetFlushArea(VAddr cpu_addr,
                                                                               u64 size) {
@@ -1574,7 +1587,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
 
 template <class P>
 void BufferCache<P>::InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
-                                  std::span<const u8> inlined_buffer) {
+                                                std::span<const u8> inlined_buffer) {
     const IntervalType subtract_interval{dest_address, dest_address + copy_size};
     ClearDownload(subtract_interval);
     common_ranges.subtract(subtract_interval);
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 4d9bab7f7..460fc7551 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -245,6 +245,8 @@ public:
 
     void CachedWriteMemory(VAddr cpu_addr, u64 size);
 
+    bool OnCPUWrite(VAddr cpu_addr, u64 size);
+
     void DownloadMemory(VAddr cpu_addr, u64 size);
 
     std::optional<VideoCore::RasterizerDownloadArea> GetFlushArea(VAddr cpu_addr, u64 size);
@@ -543,7 +545,8 @@ private:
 
     void ClearDownload(IntervalType subtract_interval);
 
-    void InlineMemoryImplementation(VAddr dest_address, size_t copy_size, std::span<const u8> inlined_buffer);
+    void InlineMemoryImplementation(VAddr dest_address, size_t copy_size,
+                                    std::span<const u8> inlined_buffer);
 
     VideoCore::RasterizerInterface& rasterizer;
     Core::Memory::Memory& cpu_memory;
diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h
index 35d699bbf..ab20ff30f 100644
--- a/src/video_core/fence_manager.h
+++ b/src/video_core/fence_manager.h
@@ -69,7 +69,6 @@ public:
     }
 
     void SignalFence(std::function<void()>&& func) {
-        rasterizer.InvalidateGPUCache();
         bool delay_fence = Settings::IsGPULevelHigh();
         if constexpr (!can_async_check) {
             TryReleasePendingFences<false>();
@@ -96,6 +95,7 @@ public:
             guard.unlock();
             cv.notify_all();
         }
+        rasterizer.InvalidateGPUCache();
     }
 
     void SignalSyncPoint(u32 value) {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index f823a1e2b..c192e33b2 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -96,7 +96,7 @@ struct GPU::Impl {
     /// Synchronizes CPU writes with Host GPU memory.
     void InvalidateGPUCache() {
         std::function<void(VAddr, size_t)> callback_writes(
-            [this](VAddr address, size_t size) { rasterizer->OnCPUWrite(address, size); });
+            [this](VAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
         system.GatherGPUDirtyMemory(callback_writes);
     }
 
@@ -301,6 +301,10 @@ struct GPU::Impl {
         gpu_thread.InvalidateRegion(addr, size);
     }
 
+    bool OnCPUWrite(VAddr addr, u64 size) {
+        return rasterizer->OnCPUWrite(addr, size);
+    }
+
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
     void FlushAndInvalidateRegion(VAddr addr, u64 size) {
         gpu_thread.FlushAndInvalidateRegion(addr, size);
@@ -563,6 +567,10 @@ void GPU::InvalidateRegion(VAddr addr, u64 size) {
     impl->InvalidateRegion(addr, size);
 }
 
+bool GPU::OnCPUWrite(VAddr addr, u64 size) {
+    return impl->OnCPUWrite(addr, size);
+}
+
 void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     impl->FlushAndInvalidateRegion(addr, size);
 }
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index e49c40cf2..ba2838b89 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -250,6 +250,10 @@ public:
     /// Notify rasterizer that any caches of the specified region should be invalidated
     void InvalidateRegion(VAddr addr, u64 size);
 
+    /// Notify rasterizer that CPU is trying to write this area. It returns true if the area is
+    /// sensible, false otherwise
+    bool OnCPUWrite(VAddr addr, u64 size);
+
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
     void FlushAndInvalidateRegion(VAddr addr, u64 size);
 
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 889144f38..2f0f9f593 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -47,7 +47,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
         } else if (const auto* flush = std::get_if<FlushRegionCommand>(&next.data)) {
             rasterizer->FlushRegion(flush->addr, flush->size);
         } else if (const auto* invalidate = std::get_if<InvalidateRegionCommand>(&next.data)) {
-            rasterizer->OnCPUWrite(invalidate->addr, invalidate->size);
+            rasterizer->OnCacheInvalidation(invalidate->addr, invalidate->size);
         } else {
             ASSERT(false);
         }
@@ -102,12 +102,12 @@ void ThreadManager::TickGPU() {
 }
 
 void ThreadManager::InvalidateRegion(VAddr addr, u64 size) {
-    rasterizer->OnCPUWrite(addr, size);
+    rasterizer->OnCacheInvalidation(addr, size);
 }
 
 void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) {
     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
-    rasterizer->OnCPUWrite(addr, size);
+    rasterizer->OnCacheInvalidation(addr, size);
 }
 
 u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) {
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 7566a8c4e..cb8029a4f 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -109,7 +109,9 @@ public:
     }
 
     /// Notify rasterizer that any caches of the specified region are desync with guest
-    virtual void OnCPUWrite(VAddr addr, u64 size) = 0;
+    virtual void OnCacheInvalidation(VAddr addr, u64 size) = 0;
+
+    virtual bool OnCPUWrite(VAddr addr, u64 size) = 0;
 
     /// Sync memory between guest and host.
     virtual void InvalidateGPUCache() = 0;
diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp
index bf2ce4c49..92ecf6682 100644
--- a/src/video_core/renderer_null/null_rasterizer.cpp
+++ b/src/video_core/renderer_null/null_rasterizer.cpp
@@ -47,7 +47,10 @@ bool RasterizerNull::MustFlushRegion(VAddr addr, u64 size, VideoCommon::CacheTyp
     return false;
 }
 void RasterizerNull::InvalidateRegion(VAddr addr, u64 size, VideoCommon::CacheType) {}
-void RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {}
+bool RasterizerNull::OnCPUWrite(VAddr addr, u64 size) {
+    return false;
+}
+void RasterizerNull::OnCacheInvalidation(VAddr addr, u64 size) {}
 VideoCore::RasterizerDownloadArea RasterizerNull::GetFlushArea(VAddr addr, u64 size) {
     VideoCore::RasterizerDownloadArea new_area{
         .start_address = Common::AlignDown(addr, Core::Memory::YUZU_PAGESIZE),
diff --git a/src/video_core/renderer_null/null_rasterizer.h b/src/video_core/renderer_null/null_rasterizer.h
index a8d35d2c1..93b9a6971 100644
--- a/src/video_core/renderer_null/null_rasterizer.h
+++ b/src/video_core/renderer_null/null_rasterizer.h
@@ -53,7 +53,8 @@ public:
                          VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
     void InvalidateRegion(VAddr addr, u64 size,
                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
-    void OnCPUWrite(VAddr addr, u64 size) override;
+    void OnCacheInvalidation(VAddr addr, u64 size) override;
+    bool OnCPUWrite(VAddr addr, u64 size) override;
     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
     void InvalidateGPUCache() override;
     void UnmapMemory(VAddr addr, u64 size) override;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index edf527f2d..aadd6967c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -485,12 +485,33 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
     }
 }
 
-void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
+bool RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
+    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
+    if (addr == 0 || size == 0) {
+        return false;
+    }
+
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        if (buffer_cache.OnCPUWrite(addr, size)) {
+            return true;
+        }
+    }
+
+    {
+        std::scoped_lock lock{texture_cache.mutex};
+        texture_cache.WriteMemory(addr, size);
+    }
+
+    shader_cache.InvalidateRegion(addr, size);
+    return false;
+}
+
+void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
     if (addr == 0 || size == 0) {
         return;
     }
-    shader_cache.OnCPUWrite(addr, size);
     {
         std::scoped_lock lock{texture_cache.mutex};
         texture_cache.WriteMemory(addr, size);
@@ -499,15 +520,11 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) {
         std::scoped_lock lock{buffer_cache.mutex};
         buffer_cache.CachedWriteMemory(addr, size);
     }
+    shader_cache.InvalidateRegion(addr, size);
 }
 
 void RasterizerOpenGL::InvalidateGPUCache() {
-    MICROPROFILE_SCOPE(OpenGL_CacheManagement);
-    shader_cache.SyncGuestHost();
-    {
-        std::scoped_lock lock{buffer_cache.mutex};
-        buffer_cache.FlushCachedWrites();
-    }
+    gpu.InvalidateGPUCache();
 }
 
 void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
@@ -519,7 +536,7 @@ void RasterizerOpenGL::UnmapMemory(VAddr addr, u64 size) {
         std::scoped_lock lock{buffer_cache.mutex};
         buffer_cache.WriteMemory(addr, size);
     }
-    shader_cache.OnCPUWrite(addr, size);
+    shader_cache.OnCacheInvalidation(addr, size);
 }
 
 void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index a73ad15c1..8eda2ddba 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -98,7 +98,8 @@ public:
     VideoCore::RasterizerDownloadArea GetFlushArea(VAddr addr, u64 size) override;
     void InvalidateRegion(VAddr addr, u64 size,
                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
-    void OnCPUWrite(VAddr addr, u64 size) override;
+    void OnCacheInvalidation(VAddr addr, u64 size) override;
+    bool OnCPUWrite(VAddr addr, u64 size) override;
     void InvalidateGPUCache() override;
     void UnmapMemory(VAddr addr, u64 size) override;
     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index a63a29e61..456bb040e 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -566,7 +566,28 @@ void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::s
     }
 }
 
-void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
+bool RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) {
+    if (addr == 0 || size == 0) {
+        return false;
+    }
+
+    {
+        std::scoped_lock lock{buffer_cache.mutex};
+        if (buffer_cache.OnCPUWrite(addr, size)) {
+            return true;
+        }
+    }
+
+    {
+        std::scoped_lock lock{texture_cache.mutex};
+        texture_cache.WriteMemory(addr, size);
+    }
+
+    pipeline_cache.InvalidateRegion(addr, size);
+    return false;
+}
+
+void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) {
     if (addr == 0 || size == 0) {
         return;
     }
@@ -595,7 +616,7 @@ void RasterizerVulkan::UnmapMemory(VAddr addr, u64 size) {
         std::scoped_lock lock{buffer_cache.mutex};
         buffer_cache.WriteMemory(addr, size);
     }
-    pipeline_cache.OnCPUWrite(addr, size);
+    pipeline_cache.OnCacheInvalidation(addr, size);
 }
 
 void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) {
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index b39710b3c..73257d964 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -96,7 +96,8 @@ public:
     void InvalidateRegion(VAddr addr, u64 size,
                           VideoCommon::CacheType which = VideoCommon::CacheType::All) override;
     void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) override;
-    void OnCPUWrite(VAddr addr, u64 size) override;
+    void OnCacheInvalidation(VAddr addr, u64 size) override;
+    bool OnCPUWrite(VAddr addr, u64 size) override;
     void InvalidateGPUCache() override;
     void UnmapMemory(VAddr addr, u64 size) override;
     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override;
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index 4db948b6d..01701201d 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -24,7 +24,7 @@ void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
     RemovePendingShaders();
 }
 
-void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
+void ShaderCache::OnCacheInvalidation(VAddr addr, size_t size) {
     std::scoped_lock lock{invalidation_mutex};
     InvalidatePagesInRegion(addr, size);
 }
diff --git a/src/video_core/shader_cache.h b/src/video_core/shader_cache.h
index f3cc4c70b..de8e08002 100644
--- a/src/video_core/shader_cache.h
+++ b/src/video_core/shader_cache.h
@@ -62,7 +62,7 @@ public:
     /// @brief Unmarks a memory region as cached and marks it for removal
     /// @param addr Start address of the CPU write operation
     /// @param size Number of bytes of the CPU write operation
-    void OnCPUWrite(VAddr addr, size_t size);
+    void OnCacheInvalidation(VAddr addr, size_t size);
 
     /// @brief Flushes delayed removal operations
     void SyncGuestHost();