BufferBase: Don't ignore GPU pages.
This commit is contained in:
parent
2d0c4f2b1d
commit
b56ad93bbc
|
@ -538,7 +538,7 @@ TEST_CASE("BufferBase: Cached write downloads") {
|
||||||
int num = 0;
|
int num = 0;
|
||||||
buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
buffer.ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
||||||
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; });
|
||||||
REQUIRE(num == 0);
|
REQUIRE(num == 1);
|
||||||
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
REQUIRE(!buffer.IsRegionCpuModified(c + PAGE, PAGE));
|
||||||
REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
|
REQUIRE(!buffer.IsRegionGpuModified(c + PAGE, PAGE));
|
||||||
buffer.FlushCachedWrites();
|
buffer.FlushCachedWrites();
|
||||||
|
|
|
@ -430,7 +430,7 @@ private:
|
||||||
if (query_begin >= SizeBytes() || size < 0) {
|
if (query_begin >= SizeBytes() || size < 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
u64* const untracked_words = Array<Type::Untracked>();
|
[[maybe_unused]] u64* const untracked_words = Array<Type::Untracked>();
|
||||||
u64* const state_words = Array<type>();
|
u64* const state_words = Array<type>();
|
||||||
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
|
const u64 query_end = query_begin + std::min(static_cast<u64>(size), SizeBytes());
|
||||||
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
|
u64* const words_begin = state_words + query_begin / BYTES_PER_WORD;
|
||||||
|
@ -483,7 +483,7 @@ private:
|
||||||
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
|
NotifyRasterizer<true>(word_index, current_bits, ~u64{0});
|
||||||
}
|
}
|
||||||
// Exclude CPU modified pages when visiting GPU pages
|
// Exclude CPU modified pages when visiting GPU pages
|
||||||
const u64 word = current_word & ~(type == Type::GPU ? untracked_words[word_index] : 0);
|
const u64 word = current_word;
|
||||||
u64 page = page_begin;
|
u64 page = page_begin;
|
||||||
page_begin = 0;
|
page_begin = 0;
|
||||||
|
|
||||||
|
@ -531,7 +531,7 @@ private:
|
||||||
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
[[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept {
|
||||||
static_assert(type != Type::Untracked);
|
static_assert(type != Type::Untracked);
|
||||||
|
|
||||||
const u64* const untracked_words = Array<Type::Untracked>();
|
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
|
||||||
const u64* const state_words = Array<type>();
|
const u64* const state_words = Array<type>();
|
||||||
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
||||||
const u64 word_begin = offset / BYTES_PER_WORD;
|
const u64 word_begin = offset / BYTES_PER_WORD;
|
||||||
|
@ -539,8 +539,7 @@ private:
|
||||||
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
|
const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE);
|
||||||
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
|
u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD;
|
||||||
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
|
for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) {
|
||||||
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
|
const u64 word = state_words[word_index];
|
||||||
const u64 word = state_words[word_index] & ~off_word;
|
|
||||||
if (word == 0) {
|
if (word == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -564,7 +563,7 @@ private:
|
||||||
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
|
[[nodiscard]] std::pair<u64, u64> ModifiedRegion(u64 offset, u64 size) const noexcept {
|
||||||
static_assert(type != Type::Untracked);
|
static_assert(type != Type::Untracked);
|
||||||
|
|
||||||
const u64* const untracked_words = Array<Type::Untracked>();
|
[[maybe_unused]] const u64* const untracked_words = Array<Type::Untracked>();
|
||||||
const u64* const state_words = Array<type>();
|
const u64* const state_words = Array<type>();
|
||||||
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
const u64 num_query_words = size / BYTES_PER_WORD + 1;
|
||||||
const u64 word_begin = offset / BYTES_PER_WORD;
|
const u64 word_begin = offset / BYTES_PER_WORD;
|
||||||
|
@ -574,8 +573,7 @@ private:
|
||||||
u64 begin = std::numeric_limits<u64>::max();
|
u64 begin = std::numeric_limits<u64>::max();
|
||||||
u64 end = 0;
|
u64 end = 0;
|
||||||
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
|
for (u64 word_index = word_begin; word_index < word_end; ++word_index) {
|
||||||
const u64 off_word = type == Type::GPU ? untracked_words[word_index] : 0;
|
const u64 word = state_words[word_index];
|
||||||
const u64 word = state_words[word_index] & ~off_word;
|
|
||||||
if (word == 0) {
|
if (word == 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
@ -123,7 +123,7 @@ void MaxwellDMA::Launch() {
|
||||||
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
|
convert_linear_2_blocklinear_addr(regs.offset_in + offset),
|
||||||
tmp_buffer.data(), tmp_buffer.size());
|
tmp_buffer.data(), tmp_buffer.size());
|
||||||
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
|
memory_manager.WriteBlockCached(regs.offset_out + offset, tmp_buffer.data(),
|
||||||
tmp_buffer.size());
|
tmp_buffer.size());
|
||||||
}
|
}
|
||||||
} else if (is_src_pitch && !is_dst_pitch) {
|
} else if (is_src_pitch && !is_dst_pitch) {
|
||||||
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
|
UNIMPLEMENTED_IF(regs.line_length_in % 16 != 0);
|
||||||
|
@ -143,7 +143,7 @@ void MaxwellDMA::Launch() {
|
||||||
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(),
|
||||||
regs.line_length_in);
|
regs.line_length_in);
|
||||||
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
|
memory_manager.WriteBlockCached(regs.offset_out, tmp_buffer.data(),
|
||||||
regs.line_length_in);
|
regs.line_length_in);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <utility>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
@ -26,8 +27,8 @@ public:
|
||||||
if (address >= start_address && address + size <= last_collection) [[likely]] {
|
if (address >= start_address && address + size <= last_collection) [[likely]] {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
size = (address + size + atomicy_side_mask) & atomicy_mask - address;
|
size = ((address + size + atomicity_size_mask) & atomicity_mask) - address;
|
||||||
address = address & atomicy_mask;
|
address = address & atomicity_mask;
|
||||||
if (!has_collected) [[unlikely]] {
|
if (!has_collected) [[unlikely]] {
|
||||||
reset_values();
|
reset_values();
|
||||||
has_collected = true;
|
has_collected = true;
|
||||||
|
@ -64,10 +65,10 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static constexpr size_t atomicy_bits = 5;
|
static constexpr size_t atomicity_bits = 5;
|
||||||
static constexpr size_t atomicy_size = 1ULL << atomicy_bits;
|
static constexpr size_t atomicity_size = 1ULL << atomicity_bits;
|
||||||
static constexpr size_t atomicy_side_mask = atomicy_size - 1;
|
static constexpr size_t atomicity_size_mask = atomicity_size - 1;
|
||||||
static constexpr size_t atomicy_mask = ~atomicy_side_mask;
|
static constexpr size_t atomicity_mask = ~atomicity_size_mask;
|
||||||
GPUVAddr start_address{};
|
GPUVAddr start_address{};
|
||||||
GPUVAddr last_collection{};
|
GPUVAddr last_collection{};
|
||||||
size_t accumulated_size{};
|
size_t accumulated_size{};
|
||||||
|
|
|
@ -46,7 +46,7 @@ MemoryManager::MemoryManager(Core::System& system_, u64 address_space_bits_, u64
|
||||||
big_page_table_cpu.resize(big_page_table_size);
|
big_page_table_cpu.resize(big_page_table_size);
|
||||||
big_page_continous.resize(big_page_table_size / continous_bits, 0);
|
big_page_continous.resize(big_page_table_size / continous_bits, 0);
|
||||||
entries.resize(page_table_size / 32, 0);
|
entries.resize(page_table_size / 32, 0);
|
||||||
if (!Settings::IsGPULevelExtreme()) {
|
if (!Settings::IsGPULevelExtreme() && Settings::IsFastmemEnabled()) {
|
||||||
fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
|
fastmem_arena = system.DeviceMemory().buffer.VirtualBasePointer();
|
||||||
} else {
|
} else {
|
||||||
fastmem_arena = nullptr;
|
fastmem_arena = nullptr;
|
||||||
|
|
|
@ -107,7 +107,7 @@ public:
|
||||||
* will be returned;
|
* will be returned;
|
||||||
*/
|
*/
|
||||||
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
|
std::vector<std::pair<GPUVAddr, std::size_t>> GetSubmappedRange(GPUVAddr gpu_addr,
|
||||||
std::size_t size) const;
|
std::size_t size) const;
|
||||||
|
|
||||||
GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
|
GPUVAddr Map(GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size,
|
||||||
PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
|
PTEKind kind = PTEKind::INVALID, bool is_big_pages = true);
|
||||||
|
|
|
@ -97,7 +97,7 @@ public:
|
||||||
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
|
VideoCommon::CacheType which = VideoCommon::CacheType::All) = 0;
|
||||||
|
|
||||||
virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
|
virtual void InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
|
||||||
for (const auto [cpu_addr, size] : sequences) {
|
for (const auto& [cpu_addr, size] : sequences) {
|
||||||
InvalidateRegion(cpu_addr, size);
|
InvalidateRegion(cpu_addr, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -486,18 +486,18 @@ void RasterizerVulkan::InvalidateRegion(VAddr addr, u64 size, VideoCommon::Cache
|
||||||
void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
|
void RasterizerVulkan::InnerInvalidation(std::span<const std::pair<VAddr, std::size_t>> sequences) {
|
||||||
{
|
{
|
||||||
std::scoped_lock lock{texture_cache.mutex};
|
std::scoped_lock lock{texture_cache.mutex};
|
||||||
for (const auto [addr, size] : sequences) {
|
for (const auto& [addr, size] : sequences) {
|
||||||
texture_cache.WriteMemory(addr, size);
|
texture_cache.WriteMemory(addr, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
std::scoped_lock lock{buffer_cache.mutex};
|
std::scoped_lock lock{buffer_cache.mutex};
|
||||||
for (const auto [addr, size] : sequences) {
|
for (const auto& [addr, size] : sequences) {
|
||||||
buffer_cache.WriteMemory(addr, size);
|
buffer_cache.WriteMemory(addr, size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
for (const auto [addr, size] : sequences) {
|
for (const auto& [addr, size] : sequences) {
|
||||||
query_cache.InvalidateRegion(addr, size);
|
query_cache.InvalidateRegion(addr, size);
|
||||||
pipeline_cache.InvalidateRegion(addr, size);
|
pipeline_cache.InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue