BufferBase: Don't ignore GPU pages.

2023-01-05 06:43:54 -05:00 · 2023-01-05 06:43:54 -05:00 · b56ad93bbc
parent 2d0c4f2b1d
commit b56ad93bbc
8 changed files with 22 additions and 23 deletions
--- a/src/tests/video_core/buffer_base.cpp
+++ b/src/tests/video_core/buffer_base.cpp
@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
    int num = 0;
    buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
    buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
-    REQUIRE(num == 0);
+    REQUIRE(num == 1);
    REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
    REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
    buffer.FlushCachedWrites();
--- a/src/video_core/buffer_cache/buffer_base.h
+++ b/src/video_core/buffer_cache/buffer_base.h
@ -430,7 +430,7 @@ private:
        if (query_begin >= SizeBytes() || size < 0) {
            return;
        }
-        u64* const untracked_words = Array<Type::Untracked>();
+        [[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
        u64* const state_words = Array<type>();
        const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
        u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
@ -483,7 +483,7 @@ private:
                NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
            }
            // Exclude CPU modified pages when visiting GPU pages
-            const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
+            const u64 word = current_word;
            u64 page = page_begin;
            page_begin = 0;

@ -531,7 +531,7 @@ private:
    [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
        static_assert(type != Type::Untracked);

-        const u64* const untracked_words = Array<Type::Untracked>();
+        [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
        const u64* const state_words = Array<type>();
        const u64 num_query_words = size / BYTES_PER_WORD + 1;
        const u64 word_begin = offset / BYTES_PER_WORD;
@ -539,8 +539,7 @@ private:
        const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
        u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
        for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
-            const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
-            const u64 word = state_words[word_index] & ~off_word;
+            const u64 word = state_words[word_index];
            if (word == 0) {
                continue;
            }
@ -564,7 +563,7 @@ private:
    [[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
        static_assert(type != Type::Untracked);

-        const u64* const untracked_words = Array<Type::Untracked>();
+        [[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
        const u64* const state_words = Array<type>();
        const u64 num_query_words = size / BYTES_PER_WORD + 1;
        const u64 word_begin = offset / BYTES_PER_WORD;
@ -574,8 +573,7 @@ private:
        u64 begin = std::numeric_limits<u64>::max();
        u64 end = 0;
        for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
-            const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
-            const u64 word = state_words[word_index] & ~off_word;
+            const u64 word = state_words[word_index];
            if (word == 0) {
                continue;
            }
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@ -123,7 +123,7 @@ void MaxwellDMA::Launch() {
                        convert_linear_2_blocklinear_addr(regs.offset_in + offset),
                        tmp_buffer.data(), tmp_buffer.size());
                    memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
-                                              tmp_buffer.size());
+                                                    tmp_buffer.size());
                }
            } else if (is_src_pitch && !is_dst_pitch) {
                UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
@ -143,7 +143,7 @@ void MaxwellDMA::Launch() {
                    memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
                                                   regs.line_length_in);
                    memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
-                                              regs.line_length_in);
+                                                    regs.line_length_in);
                }
            }
        }
--- a/src/video_core/invalidation_accumulator.h
+++ b/src/video_core/invalidation_accumulator.h
@ -3,6 +3,7 @@

 #pragma once

+#include <utility>
 #include <vector>

 #include "common/common_types.h"
@ -26,8 +27,8 @@ public:
        if (address >= start_address && address + size <= last_collection) [[likely]] {
            return;
        }
-        size = (address + size + atomicy_side_mask) & atomicy_mask - address;
-        address = address & atomicy_mask;
+        size = ((address + size + atomicity_size_mask) & atomicity_mask) - address;
+        address = address & atomicity_mask;
        if (!has_collected) [[unlikely]] {
            reset_values();
            has_collected = true;
@ -64,10 +65,10 @@ public:
    }

 private:
-    static constexpr size_t atomicy_bits = 5;
-    static constexpr size_t atomicy_size = 1ULL << atomicy_bits;
-    static constexpr size_t atomicy_side_mask = atomicy_size - 1;
-    static constexpr size_t atomicy_mask = ~atomicy_side_mask;
+    static constexpr size_t atomicity_bits = 5;
+    static constexpr size_t atomicity_size = 1ULL << atomicity_bits;
+    static constexpr size_t atomicity_size_mask = atomicity_size - 1;
+    static constexpr size_t atomicity_mask = ~atomicity_size_mask;
    GPUVAddr start_address{};
    GPUVAddr last_collection{};
    size_t accumulated_size{};
--- a/src/video_core/memory_manager.cpp
+++ b/src/video_core/memory_manager.cpp
@ -46,7 +46,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
    big_page_table_cpu.resize(big_page_table_size);
    big_page_continous.resize(big_page_table_size / continous_bits, 0);
    entries.resize(page_table_size / 32, 0);
-    if (!Settings::IsGPULevelExtreme()) {
+    if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
        fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
    } else {
        fastmem_arena = nullptr;
--- a/src/video_core/memory_manager.h
+++ b/src/video_core/memory_manager.h
@ -107,7 +107,7 @@ public:
     * will be returned;
     */
    std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
-                                                                 std::size_t size) const;
+                                                                    std::size_t size) const;

    GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
                 PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@ -97,7 +97,7 @@ public:
                                  VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;

    virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
-        for (const auto [cpu_addr, size] : sequences) {
+        for (const auto& [cpu_addr, size] : sequences) {
            InvalidateRegion(cpu_addr, size);
        }
    }
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@ -486,18 +486,18 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
 void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
    {
        std::scoped_lock lock{texture_cache.mutex};
-        for (const auto [addr, size] : sequences) {
+        for (const auto& [addr, size] : sequences) {
            texture_cache.WriteMemory(addr, size);
        }
    }
    {
        std::scoped_lock lock{buffer_cache.mutex};
-        for (const auto [addr, size] : sequences) {
+        for (const auto& [addr, size] : sequences) {
            buffer_cache.WriteMemory(addr, size);
        }
    }
    {
-        for (const auto [addr, size] : sequences) {
+        for (const auto& [addr, size] : sequences) {
            query_cache.InvalidateRegion(addr, size);
            pipeline_cache.InvalidateRegion(addr, size);
        }