VideoCore: implement channels on gpu caches.
This commit is contained in:
parent
c77b8df12e
commit
139ea93512
|
@ -10,13 +10,17 @@
|
|||
#include "core/hle/service/nvdrv/core/container.h"
|
||||
#include "core/hle/service/nvdrv/core/nvmap.h"
|
||||
#include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h"
|
||||
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
|
||||
#include "core/hle/service/nvdrv/nvdrv.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Service::Nvidia::Devices {
|
||||
|
||||
nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, NvCore::Container& core)
|
||||
: nvdevice{system_}, container{core}, nvmap{core.GetNvMapFile()} {}
|
||||
nvhost_as_gpu::nvhost_as_gpu(Core::System& system_, Module& module_, NvCore::Container& core)
|
||||
: nvdevice{system_}, module{module_}, container{core}, nvmap{core.GetNvMapFile()},
|
||||
gmmu{std::make_shared<Tegra::MemoryManager>(system)} {}
|
||||
nvhost_as_gpu::~nvhost_as_gpu() = default;
|
||||
|
||||
NvResult nvhost_as_gpu::Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
|
||||
|
@ -102,9 +106,9 @@ NvResult nvhost_as_gpu::AllocateSpace(const std::vector<u8>& input, std::vector<
|
|||
|
||||
const auto size{static_cast<u64>(params.pages) * static_cast<u64>(params.page_size)};
|
||||
if ((params.flags & AddressSpaceFlags::FixedOffset) != AddressSpaceFlags::None) {
|
||||
params.offset = *system.GPU().MemoryManager().AllocateFixed(params.offset, size);
|
||||
params.offset = *(gmmu->AllocateFixed(params.offset, size));
|
||||
} else {
|
||||
params.offset = system.GPU().MemoryManager().Allocate(size, params.align);
|
||||
params.offset = gmmu->Allocate(size, params.align);
|
||||
}
|
||||
|
||||
auto result = NvResult::Success;
|
||||
|
@ -124,8 +128,7 @@ NvResult nvhost_as_gpu::FreeSpace(const std::vector<u8>& input, std::vector<u8>&
|
|||
LOG_DEBUG(Service_NVDRV, "called, offset={:X}, pages={:X}, page_size={:X}", params.offset,
|
||||
params.pages, params.page_size);
|
||||
|
||||
system.GPU().MemoryManager().Unmap(params.offset,
|
||||
static_cast<std::size_t>(params.pages) * params.page_size);
|
||||
gmmu->Unmap(params.offset, static_cast<std::size_t>(params.pages) * params.page_size);
|
||||
|
||||
std::memcpy(output.data(), ¶ms, output.size());
|
||||
return NvResult::Success;
|
||||
|
@ -148,7 +151,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
|
|||
// If nvmap handle is null, we should unmap instead.
|
||||
const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
|
||||
const auto size{static_cast<u64>(entry.pages) << 0x10};
|
||||
system.GPU().MemoryManager().Unmap(offset, size);
|
||||
gmmu->Unmap(offset, size);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -162,8 +165,7 @@ NvResult nvhost_as_gpu::Remap(const std::vector<u8>& input, std::vector<u8>& out
|
|||
const auto offset{static_cast<GPUVAddr>(entry.offset) << 0x10};
|
||||
const auto size{static_cast<u64>(entry.pages) << 0x10};
|
||||
const auto map_offset{static_cast<u64>(entry.map_offset) << 0x10};
|
||||
const auto addr{
|
||||
system.GPU().MemoryManager().Map(object->address + map_offset, offset, size)};
|
||||
const auto addr{gmmu->Map(object->address + map_offset, offset, size)};
|
||||
|
||||
if (!addr) {
|
||||
LOG_CRITICAL(Service_NVDRV, "map returned an invalid address!");
|
||||
|
@ -186,13 +188,12 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
|
|||
params.flags, params.nvmap_handle, params.buffer_offset, params.mapping_size,
|
||||
params.offset);
|
||||
|
||||
auto& gpu = system.GPU();
|
||||
if ((params.flags & AddressSpaceFlags::Remap) != AddressSpaceFlags::None) {
|
||||
if (const auto buffer_map{FindBufferMap(params.offset)}; buffer_map) {
|
||||
const auto cpu_addr{static_cast<VAddr>(buffer_map->CpuAddr() + params.buffer_offset)};
|
||||
const auto gpu_addr{static_cast<GPUVAddr>(params.offset + params.buffer_offset)};
|
||||
|
||||
if (!gpu.MemoryManager().Map(cpu_addr, gpu_addr, params.mapping_size)) {
|
||||
if (!gmmu->Map(cpu_addr, gpu_addr, params.mapping_size)) {
|
||||
LOG_CRITICAL(Service_NVDRV,
|
||||
"remap failed, flags={:X}, nvmap_handle={:X}, buffer_offset={}, "
|
||||
"mapping_size = {}, offset={}",
|
||||
|
@ -238,9 +239,9 @@ NvResult nvhost_as_gpu::MapBufferEx(const std::vector<u8>& input, std::vector<u8
|
|||
|
||||
const bool is_alloc{(params.flags & AddressSpaceFlags::FixedOffset) == AddressSpaceFlags::None};
|
||||
if (is_alloc) {
|
||||
params.offset = gpu.MemoryManager().MapAllocate(physical_address, size, page_size);
|
||||
params.offset = gmmu->MapAllocate(physical_address, size, page_size);
|
||||
} else {
|
||||
params.offset = gpu.MemoryManager().Map(physical_address, params.offset, size);
|
||||
params.offset = gmmu->Map(physical_address, params.offset, size);
|
||||
}
|
||||
|
||||
auto result = NvResult::Success;
|
||||
|
@ -262,7 +263,7 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
|
|||
LOG_DEBUG(Service_NVDRV, "called, offset=0x{:X}", params.offset);
|
||||
|
||||
if (const auto size{RemoveBufferMap(params.offset)}; size) {
|
||||
system.GPU().MemoryManager().Unmap(params.offset, *size);
|
||||
gmmu->Unmap(params.offset, *size);
|
||||
} else {
|
||||
LOG_ERROR(Service_NVDRV, "invalid offset=0x{:X}", params.offset);
|
||||
}
|
||||
|
@ -274,9 +275,10 @@ NvResult nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8
|
|||
NvResult nvhost_as_gpu::BindChannel(const std::vector<u8>& input, std::vector<u8>& output) {
|
||||
IoctlBindChannel params{};
|
||||
std::memcpy(¶ms, input.data(), input.size());
|
||||
LOG_WARNING(Service_NVDRV, "(STUBBED) called, fd={:X}", params.fd);
|
||||
LOG_DEBUG(Service_NVDRV, "called, fd={:X}", params.fd);
|
||||
|
||||
channel = params.fd;
|
||||
auto gpu_channel_device = module.GetDevice<nvhost_gpu>(params.fd);
|
||||
gpu_channel_device->channel_state->memory_manager = gmmu;
|
||||
return NvResult::Success;
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,14 @@
|
|||
#include "common/swap.h"
|
||||
#include "core/hle/service/nvdrv/devices/nvdevice.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
} // namespace Tegra
|
||||
|
||||
namespace Service::Nvidia {
|
||||
class Module;
|
||||
}
|
||||
|
||||
namespace Service::Nvidia::NvCore {
|
||||
class Container;
|
||||
class NvMap;
|
||||
|
@ -34,7 +42,7 @@ DECLARE_ENUM_FLAG_OPERATORS(AddressSpaceFlags);
|
|||
|
||||
class nvhost_as_gpu final : public nvdevice {
|
||||
public:
|
||||
explicit nvhost_as_gpu(Core::System& system_, NvCore::Container& core);
|
||||
explicit nvhost_as_gpu(Core::System& system_, Module& module, NvCore::Container& core);
|
||||
~nvhost_as_gpu() override;
|
||||
|
||||
NvResult Ioctl1(DeviceFD fd, Ioctl command, const std::vector<u8>& input,
|
||||
|
@ -187,9 +195,13 @@ private:
|
|||
void AddBufferMap(GPUVAddr gpu_addr, std::size_t size, VAddr cpu_addr, bool is_allocated);
|
||||
std::optional<std::size_t> RemoveBufferMap(GPUVAddr gpu_addr);
|
||||
|
||||
Module& module;
|
||||
|
||||
NvCore::Container& container;
|
||||
NvCore::NvMap& nvmap;
|
||||
|
||||
std::shared_ptr<Tegra::MemoryManager> gmmu;
|
||||
|
||||
// This is expected to be ordered, therefore we must use a map, not unordered_map
|
||||
std::map<GPUVAddr, BufferMap> buffer_mappings;
|
||||
};
|
||||
|
|
|
@ -11,12 +11,14 @@
|
|||
#include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
|
||||
#include "core/hle/service/nvdrv/nvdrv.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/engines/puller.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Service::Nvidia::Devices {
|
||||
namespace {
|
||||
Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoint_id) {
|
||||
Tegra::GPU::FenceAction result{};
|
||||
Tegra::CommandHeader BuildFenceAction(Tegra::Engines::Puller::FenceOperation op, u32 syncpoint_id) {
|
||||
Tegra::Engines::Puller::FenceAction result{};
|
||||
result.op.Assign(op);
|
||||
result.syncpoint_id.Assign(syncpoint_id);
|
||||
return {result.raw};
|
||||
|
@ -26,7 +28,8 @@ Tegra::CommandHeader BuildFenceAction(Tegra::GPU::FenceOperation op, u32 syncpoi
|
|||
nvhost_gpu::nvhost_gpu(Core::System& system_, EventInterface& events_interface_,
|
||||
NvCore::Container& core_)
|
||||
: nvdevice{system_}, events_interface{events_interface_}, core{core_},
|
||||
syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()} {
|
||||
syncpoint_manager{core_.GetSyncpointManager()}, nvmap{core.GetNvMapFile()},
|
||||
channel_state{system.GPU().AllocateChannel()} {
|
||||
channel_fence.id = syncpoint_manager.AllocateSyncpoint();
|
||||
channel_fence.value = system_.GPU().GetSyncpointValue(channel_fence.id);
|
||||
sm_exception_breakpoint_int_report_event =
|
||||
|
@ -180,6 +183,12 @@ NvResult nvhost_gpu::AllocGPFIFOEx2(const std::vector<u8>& input, std::vector<u8
|
|||
params.num_entries, params.flags, params.unk0, params.unk1, params.unk2,
|
||||
params.unk3);
|
||||
|
||||
if (channel_state->initiated) {
|
||||
LOG_CRITICAL(Service_NVDRV, "Already allocated!");
|
||||
return NvResult::AlreadyAllocated;
|
||||
}
|
||||
|
||||
system.GPU().InitChannel(*channel_state);
|
||||
channel_fence.value = system.GPU().GetSyncpointValue(channel_fence.id);
|
||||
|
||||
params.fence_out = channel_fence;
|
||||
|
@ -206,7 +215,7 @@ static std::vector<Tegra::CommandHeader> BuildWaitCommandList(NvFence fence) {
|
|||
{fence.value},
|
||||
Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
|
||||
Tegra::SubmissionMode::Increasing),
|
||||
BuildFenceAction(Tegra::GPU::FenceOperation::Acquire, fence.id),
|
||||
BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Acquire, fence.id),
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -220,7 +229,8 @@ static std::vector<Tegra::CommandHeader> BuildIncrementCommandList(NvFence fence
|
|||
for (u32 count = 0; count < add_increment; ++count) {
|
||||
result.emplace_back(Tegra::BuildCommandHeader(Tegra::BufferMethods::FenceAction, 1,
|
||||
Tegra::SubmissionMode::Increasing));
|
||||
result.emplace_back(BuildFenceAction(Tegra::GPU::FenceOperation::Increment, fence.id));
|
||||
result.emplace_back(
|
||||
BuildFenceAction(Tegra::Engines::Puller::FenceOperation::Increment, fence.id));
|
||||
}
|
||||
|
||||
return result;
|
||||
|
@ -247,11 +257,13 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
|
|||
|
||||
auto& gpu = system.GPU();
|
||||
|
||||
const auto bind_id = channel_state->bind_id;
|
||||
|
||||
params.fence_out.id = channel_fence.id;
|
||||
|
||||
if (params.flags.add_wait.Value() &&
|
||||
!syncpoint_manager.IsSyncpointExpired(params.fence_out.id, params.fence_out.value)) {
|
||||
gpu.PushGPUEntries(Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
|
||||
gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildWaitCommandList(params.fence_out)});
|
||||
}
|
||||
|
||||
if (params.flags.add_increment.Value() || params.flags.increment.Value()) {
|
||||
|
@ -262,15 +274,15 @@ NvResult nvhost_gpu::SubmitGPFIFOImpl(IoctlSubmitGpfifo& params, std::vector<u8>
|
|||
params.fence_out.value = syncpoint_manager.GetSyncpointMax(params.fence_out.id);
|
||||
}
|
||||
|
||||
gpu.PushGPUEntries(std::move(entries));
|
||||
gpu.PushGPUEntries(bind_id, std::move(entries));
|
||||
|
||||
if (params.flags.add_increment.Value()) {
|
||||
if (params.flags.suppress_wfi) {
|
||||
gpu.PushGPUEntries(Tegra::CommandList{
|
||||
BuildIncrementCommandList(params.fence_out, params.AddIncrementValue())});
|
||||
gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementCommandList(
|
||||
params.fence_out, params.AddIncrementValue())});
|
||||
} else {
|
||||
gpu.PushGPUEntries(Tegra::CommandList{
|
||||
BuildIncrementWithWfiCommandList(params.fence_out, params.AddIncrementValue())});
|
||||
gpu.PushGPUEntries(bind_id, Tegra::CommandList{BuildIncrementWithWfiCommandList(
|
||||
params.fence_out, params.AddIncrementValue())});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -13,6 +13,12 @@
|
|||
#include "core/hle/service/nvdrv/nvdata.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
|
||||
namespace Tegra {
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
} // namespace Tegra
|
||||
|
||||
namespace Service::Nvidia {
|
||||
|
||||
namespace NvCore {
|
||||
|
@ -26,6 +32,7 @@ class EventInterface;
|
|||
|
||||
namespace Service::Nvidia::Devices {
|
||||
|
||||
class nvhost_as_gpu;
|
||||
class nvmap;
|
||||
class nvhost_gpu final : public nvdevice {
|
||||
public:
|
||||
|
@ -46,6 +53,7 @@ public:
|
|||
Kernel::KEvent* QueryEvent(u32 event_id) override;
|
||||
|
||||
private:
|
||||
friend class nvhost_as_gpu;
|
||||
enum class CtxObjects : u32_le {
|
||||
Ctx2D = 0x902D,
|
||||
Ctx3D = 0xB197,
|
||||
|
@ -204,6 +212,7 @@ private:
|
|||
NvCore::Container& core;
|
||||
NvCore::SyncpointManager& syncpoint_manager;
|
||||
NvCore::NvMap& nvmap;
|
||||
std::shared_ptr<Tegra::Control::ChannelState> channel_state;
|
||||
NvFence channel_fence;
|
||||
|
||||
// Events
|
||||
|
|
|
@ -168,7 +168,7 @@ NvResult nvmap::IocFromId(const std::vector<u8>& input, std::vector<u8>& output)
|
|||
IocFromIdParams params;
|
||||
std::memcpy(¶ms, input.data(), sizeof(params));
|
||||
|
||||
LOG_DEBUG(Service_NVDRV, "called, id:{}");
|
||||
LOG_DEBUG(Service_NVDRV, "called, id:{}", params.id);
|
||||
|
||||
// Handles and IDs are always the same value in nvmap however IDs can be used globally given the
|
||||
// right permissions.
|
||||
|
|
|
@ -74,7 +74,7 @@ Module::Module(Core::System& system)
|
|||
: service_context{system, "nvdrv"}, events_interface{*this}, container{system.GPU()} {
|
||||
builders["/dev/nvhost-as-gpu"] = [this, &system](DeviceFD fd) {
|
||||
std::shared_ptr<Devices::nvdevice> device =
|
||||
std::make_shared<Devices::nvhost_as_gpu>(system, container);
|
||||
std::make_shared<Devices::nvhost_as_gpu>(system, *this, container);
|
||||
return open_files.emplace(fd, device).first;
|
||||
};
|
||||
builders["/dev/nvhost-gpu"] = [this, &system](DeviceFD fd) {
|
||||
|
|
|
@ -35,6 +35,12 @@ add_library(video_core STATIC
|
|||
command_classes/vic.h
|
||||
compatible_formats.cpp
|
||||
compatible_formats.h
|
||||
control/channel_state.cpp
|
||||
control/channel_state.h
|
||||
control/channel_state_cache.cpp
|
||||
control/channel_state_cache.h
|
||||
control/scheduler.cpp
|
||||
control/scheduler.h
|
||||
delayed_destruction_ring.h
|
||||
dirty_flags.cpp
|
||||
dirty_flags.h
|
||||
|
@ -54,6 +60,8 @@ add_library(video_core STATIC
|
|||
engines/maxwell_3d.h
|
||||
engines/maxwell_dma.cpp
|
||||
engines/maxwell_dma.h
|
||||
engines/puller.cpp
|
||||
engines/puller.h
|
||||
framebuffer_config.h
|
||||
macro/macro.cpp
|
||||
macro/macro.h
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <deque>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <numeric>
|
||||
|
@ -23,6 +22,7 @@
|
|||
#include "common/settings.h"
|
||||
#include "core/memory.h"
|
||||
#include "video_core/buffer_cache/buffer_base.h"
|
||||
#include "video_core/control/channel_state_cache.h"
|
||||
#include "video_core/delayed_destruction_ring.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
|
@ -56,7 +56,7 @@ using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFE
|
|||
using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
|
||||
|
||||
template <typename P>
|
||||
class BufferCache {
|
||||
class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
|
||||
|
||||
// Page size for caching purposes.
|
||||
// This is unrelated to the CPU page size and it can be changed as it seems optimal.
|
||||
|
@ -116,10 +116,7 @@ public:
|
|||
static constexpr u32 DEFAULT_SKIP_CACHE_SIZE = static_cast<u32>(4_KiB);
|
||||
|
||||
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
Runtime& runtime_);
|
||||
Core::Memory::Memory& cpu_memory_, Runtime& runtime_);
|
||||
|
||||
void TickFrame();
|
||||
|
||||
|
@ -367,9 +364,6 @@ private:
|
|||
void ClearDownload(IntervalType subtract_interval);
|
||||
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Core::Memory::Memory& cpu_memory;
|
||||
|
||||
SlotVector<Buffer> slot_buffers;
|
||||
|
@ -444,12 +438,8 @@ private:
|
|||
|
||||
template <class P>
|
||||
BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
Runtime& runtime_)
|
||||
: runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
|
||||
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} {
|
||||
Core::Memory::Memory& cpu_memory_, Runtime& runtime_)
|
||||
: runtime{runtime_}, rasterizer{rasterizer_}, cpu_memory{cpu_memory_} {
|
||||
// Ensure the first slot is used for the null buffer
|
||||
void(slot_buffers.insert(runtime, NullBufferParams{}));
|
||||
common_ranges.clear();
|
||||
|
@ -552,8 +542,8 @@ void BufferCache<P>::ClearDownload(IntervalType subtract_interval) {
|
|||
|
||||
template <class P>
|
||||
bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) {
|
||||
const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address);
|
||||
const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address);
|
||||
const std::optional<VAddr> cpu_src_address = gpu_memory->GpuToCpuAddress(src_address);
|
||||
const std::optional<VAddr> cpu_dest_address = gpu_memory->GpuToCpuAddress(dest_address);
|
||||
if (!cpu_src_address || !cpu_dest_address) {
|
||||
return false;
|
||||
}
|
||||
|
@ -611,7 +601,7 @@ bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 am
|
|||
|
||||
template <class P>
|
||||
bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
|
||||
const std::optional<VAddr> cpu_dst_address = gpu_memory.GpuToCpuAddress(dst_address);
|
||||
const std::optional<VAddr> cpu_dst_address = gpu_memory->GpuToCpuAddress(dst_address);
|
||||
if (!cpu_dst_address) {
|
||||
return false;
|
||||
}
|
||||
|
@ -635,7 +625,7 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) {
|
|||
template <class P>
|
||||
void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr,
|
||||
u32 size) {
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
const Binding binding{
|
||||
.cpu_addr = *cpu_addr,
|
||||
.size = size,
|
||||
|
@ -673,7 +663,7 @@ void BufferCache<P>::BindHostGeometryBuffers(bool is_indexed) {
|
|||
if (is_indexed) {
|
||||
BindHostIndexBuffer();
|
||||
} else if constexpr (!HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const auto& regs = maxwell3d->regs;
|
||||
if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
|
||||
runtime.BindQuadArrayIndexBuffer(regs.vertex_buffer.first, regs.vertex_buffer.count);
|
||||
}
|
||||
|
@ -733,7 +723,7 @@ void BufferCache<P>::BindGraphicsStorageBuffer(size_t stage, size_t ssbo_index,
|
|||
enabled_storage_buffers[stage] |= 1U << ssbo_index;
|
||||
written_storage_buffers[stage] |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
|
||||
const auto& cbufs = maxwell3d.state.shader_stages[stage];
|
||||
const auto& cbufs = maxwell3d->state.shader_stages[stage];
|
||||
const GPUVAddr ssbo_addr = cbufs.const_buffers[cbuf_index].address + cbuf_offset;
|
||||
storage_buffers[stage][ssbo_index] = StorageBufferBinding(ssbo_addr);
|
||||
}
|
||||
|
@ -770,7 +760,7 @@ void BufferCache<P>::BindComputeStorageBuffer(size_t ssbo_index, u32 cbuf_index,
|
|||
enabled_compute_storage_buffers |= 1U << ssbo_index;
|
||||
written_compute_storage_buffers |= (is_written ? 1U : 0U) << ssbo_index;
|
||||
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
const auto& launch_desc = kepler_compute->launch_description;
|
||||
ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
|
||||
|
||||
const auto& cbufs = launch_desc.const_buffer_config;
|
||||
|
@ -991,19 +981,19 @@ void BufferCache<P>::BindHostIndexBuffer() {
|
|||
const u32 size = index_buffer.size;
|
||||
SynchronizeBuffer(buffer, index_buffer.cpu_addr, size);
|
||||
if constexpr (HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT) {
|
||||
const u32 new_offset = offset + maxwell3d.regs.index_array.first *
|
||||
maxwell3d.regs.index_array.FormatSizeInBytes();
|
||||
const u32 new_offset = offset + maxwell3d->regs.index_array.first *
|
||||
maxwell3d->regs.index_array.FormatSizeInBytes();
|
||||
runtime.BindIndexBuffer(buffer, new_offset, size);
|
||||
} else {
|
||||
runtime.BindIndexBuffer(maxwell3d.regs.draw.topology, maxwell3d.regs.index_array.format,
|
||||
maxwell3d.regs.index_array.first, maxwell3d.regs.index_array.count,
|
||||
buffer, offset, size);
|
||||
runtime.BindIndexBuffer(maxwell3d->regs.draw.topology, maxwell3d->regs.index_array.format,
|
||||
maxwell3d->regs.index_array.first,
|
||||
maxwell3d->regs.index_array.count, buffer, offset, size);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostVertexBuffers() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
||||
const Binding& binding = vertex_buffers[index];
|
||||
Buffer& buffer = slot_buffers[binding.buffer_id];
|
||||
|
@ -1014,7 +1004,7 @@ void BufferCache<P>::BindHostVertexBuffers() {
|
|||
}
|
||||
flags[Dirty::VertexBuffer0 + index] = false;
|
||||
|
||||
const u32 stride = maxwell3d.regs.vertex_array[index].stride;
|
||||
const u32 stride = maxwell3d->regs.vertex_array[index].stride;
|
||||
const u32 offset = buffer.Offset(binding.cpu_addr);
|
||||
runtime.BindVertexBuffer(index, buffer, offset, binding.size, stride);
|
||||
}
|
||||
|
@ -1154,7 +1144,7 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::BindHostTransformFeedbackBuffers() {
|
||||
if (maxwell3d.regs.tfb_enabled == 0) {
|
||||
if (maxwell3d->regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
|
||||
|
@ -1262,8 +1252,8 @@ template <class P>
|
|||
void BufferCache<P>::UpdateIndexBuffer() {
|
||||
// We have to check for the dirty flags and index count
|
||||
// The index count is currently changed without updating the dirty flags
|
||||
const auto& index_array = maxwell3d.regs.index_array;
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
const auto& index_array = maxwell3d->regs.index_array;
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
if (!flags[Dirty::IndexBuffer] && last_index_count == index_array.count) {
|
||||
return;
|
||||
}
|
||||
|
@ -1272,7 +1262,7 @@ void BufferCache<P>::UpdateIndexBuffer() {
|
|||
|
||||
const GPUVAddr gpu_addr_begin = index_array.StartAddress();
|
||||
const GPUVAddr gpu_addr_end = index_array.EndAddress();
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
|
||||
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
|
||||
const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes();
|
||||
const u32 size = std::min(address_size, draw_size);
|
||||
|
@ -1289,8 +1279,8 @@ void BufferCache<P>::UpdateIndexBuffer() {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateVertexBuffers() {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
if (!maxwell3d.dirty.flags[Dirty::VertexBuffers]) {
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
if (!maxwell3d->dirty.flags[Dirty::VertexBuffers]) {
|
||||
return;
|
||||
}
|
||||
flags[Dirty::VertexBuffers] = false;
|
||||
|
@ -1302,28 +1292,15 @@ void BufferCache<P>::UpdateVertexBuffers() {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateVertexBuffer(u32 index) {
|
||||
if (!maxwell3d.dirty.flags[Dirty::VertexBuffer0 + index]) {
|
||||
if (!maxwell3d->dirty.flags[Dirty::VertexBuffer0 + index]) {
|
||||
return;
|
||||
}
|
||||
const auto& array = maxwell3d.regs.vertex_array[index];
|
||||
const auto& limit = maxwell3d.regs.vertex_array_limit[index];
|
||||
const auto& array = maxwell3d->regs.vertex_array[index];
|
||||
const auto& limit = maxwell3d->regs.vertex_array_limit[index];
|
||||
const GPUVAddr gpu_addr_begin = array.StartAddress();
|
||||
const GPUVAddr gpu_addr_end = limit.LimitAddress() + 1;
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin);
|
||||
u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
|
||||
if (address_size >= 64_MiB) {
|
||||
// Reported vertex buffer size is very large, cap to mapped buffer size
|
||||
GPUVAddr submapped_addr_end = gpu_addr_begin;
|
||||
|
||||
const auto ranges{gpu_memory.GetSubmappedRange(gpu_addr_begin, address_size)};
|
||||
if (ranges.size() > 0) {
|
||||
const auto& [addr, size] = *ranges.begin();
|
||||
submapped_addr_end = addr + size;
|
||||
}
|
||||
|
||||
address_size =
|
||||
std::min(address_size, static_cast<u32>(submapped_addr_end - gpu_addr_begin));
|
||||
}
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr_begin);
|
||||
const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin);
|
||||
const u32 size = address_size; // TODO: Analyze stride and number of vertices
|
||||
if (array.enable == 0 || size == 0 || !cpu_addr) {
|
||||
vertex_buffers[index] = NULL_BINDING;
|
||||
|
@ -1382,7 +1359,7 @@ void BufferCache<P>::UpdateTextureBuffers(size_t stage) {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateTransformFeedbackBuffers() {
|
||||
if (maxwell3d.regs.tfb_enabled == 0) {
|
||||
if (maxwell3d->regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) {
|
||||
|
@ -1392,10 +1369,10 @@ void BufferCache<P>::UpdateTransformFeedbackBuffers() {
|
|||
|
||||
template <class P>
|
||||
void BufferCache<P>::UpdateTransformFeedbackBuffer(u32 index) {
|
||||
const auto& binding = maxwell3d.regs.tfb_bindings[index];
|
||||
const auto& binding = maxwell3d->regs.tfb_bindings[index];
|
||||
const GPUVAddr gpu_addr = binding.Address() + binding.buffer_offset;
|
||||
const u32 size = binding.buffer_size;
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
if (binding.buffer_enable == 0 || size == 0 || !cpu_addr) {
|
||||
transform_feedback_buffers[index] = NULL_BINDING;
|
||||
return;
|
||||
|
@ -1414,10 +1391,10 @@ void BufferCache<P>::UpdateComputeUniformBuffers() {
|
|||
ForEachEnabledBit(enabled_compute_uniform_buffer_mask, [&](u32 index) {
|
||||
Binding& binding = compute_uniform_buffers[index];
|
||||
binding = NULL_BINDING;
|
||||
const auto& launch_desc = kepler_compute.launch_description;
|
||||
const auto& launch_desc = kepler_compute->launch_description;
|
||||
if (((launch_desc.const_buffer_enable_mask >> index) & 1) != 0) {
|
||||
const auto& cbuf = launch_desc.const_buffer_config[index];
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(cbuf.Address());
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(cbuf.Address());
|
||||
if (cpu_addr) {
|
||||
binding.cpu_addr = *cpu_addr;
|
||||
binding.size = cbuf.size;
|
||||
|
@ -1831,7 +1808,7 @@ void BufferCache<P>::NotifyBufferDeletion() {
|
|||
dirty_uniform_buffers.fill(~u32{0});
|
||||
uniform_buffer_binding_sizes.fill({});
|
||||
}
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
flags[Dirty::IndexBuffer] = true;
|
||||
flags[Dirty::VertexBuffers] = true;
|
||||
for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) {
|
||||
|
@ -1842,9 +1819,9 @@ void BufferCache<P>::NotifyBufferDeletion() {
|
|||
|
||||
template <class P>
|
||||
typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr ssbo_addr) const {
|
||||
const GPUVAddr gpu_addr = gpu_memory.Read<u64>(ssbo_addr);
|
||||
const u32 size = gpu_memory.Read<u32>(ssbo_addr + 8);
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||
const GPUVAddr gpu_addr = gpu_memory->Read<u64>(ssbo_addr);
|
||||
const u32 size = gpu_memory->Read<u32>(ssbo_addr + 8);
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr || size == 0) {
|
||||
return NULL_BINDING;
|
||||
}
|
||||
|
@ -1859,7 +1836,7 @@ typename BufferCache<P>::Binding BufferCache<P>::StorageBufferBinding(GPUVAddr s
|
|||
template <class P>
|
||||
typename BufferCache<P>::TextureBufferBinding BufferCache<P>::GetTextureBufferBinding(
|
||||
GPUVAddr gpu_addr, u32 size, PixelFormat format) {
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
TextureBufferBinding binding;
|
||||
if (!cpu_addr || size == 0) {
|
||||
binding.cpu_addr = 0;
|
||||
|
|
44
src/video_core/control/channel_state.cpp
Normal file
44
src/video_core/control/channel_state.cpp
Normal file
|
@ -0,0 +1,44 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/kepler_memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/engines/puller.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace Tegra::Control {
|
||||
|
||||
ChannelState::ChannelState(s32 bind_id_) {
|
||||
bind_id = bind_id_;
|
||||
initiated = false;
|
||||
}
|
||||
|
||||
void ChannelState::Init(Core::System& system, GPU& gpu) {
|
||||
ASSERT(memory_manager);
|
||||
dma_pusher = std::make_unique<Tegra::DmaPusher>(system, gpu, *memory_manager, *this);
|
||||
maxwell_3d = std::make_unique<Engines::Maxwell3D>(system, *memory_manager);
|
||||
fermi_2d = std::make_unique<Engines::Fermi2D>();
|
||||
kepler_compute = std::make_unique<Engines::KeplerCompute>(system, *memory_manager);
|
||||
maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, *memory_manager);
|
||||
kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager);
|
||||
initiated = true;
|
||||
}
|
||||
|
||||
void ChannelState::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
|
||||
dma_pusher->BindRasterizer(rasterizer);
|
||||
memory_manager->BindRasterizer(rasterizer);
|
||||
maxwell_3d->BindRasterizer(rasterizer);
|
||||
fermi_2d->BindRasterizer(rasterizer);
|
||||
kepler_memory->BindRasterizer(rasterizer);
|
||||
kepler_compute->BindRasterizer(rasterizer);
|
||||
maxwell_dma->BindRasterizer(rasterizer);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Control
|
69
src/video_core/control/channel_state.h
Normal file
69
src/video_core/control/channel_state.h
Normal file
|
@ -0,0 +1,69 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
class GPU;
|
||||
|
||||
namespace Engines {
|
||||
class Puller;
|
||||
class Fermi2D;
|
||||
class Maxwell3D;
|
||||
class MaxwellDMA;
|
||||
class KeplerCompute;
|
||||
class KeplerMemory;
|
||||
} // namespace Engines
|
||||
|
||||
class MemoryManager;
|
||||
class DmaPusher;
|
||||
|
||||
namespace Control {
|
||||
|
||||
struct ChannelState {
|
||||
ChannelState(s32 bind_id);
|
||||
ChannelState(const ChannelState& state) = delete;
|
||||
ChannelState& operator=(const ChannelState&) = delete;
|
||||
ChannelState(ChannelState&& other) noexcept = default;
|
||||
ChannelState& operator=(ChannelState&& other) noexcept = default;
|
||||
|
||||
void Init(Core::System& system, GPU& gpu);
|
||||
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
s32 bind_id = -1;
|
||||
/// 3D engine
|
||||
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
|
||||
/// 2D engine
|
||||
std::unique_ptr<Engines::Fermi2D> fermi_2d;
|
||||
/// Compute engine
|
||||
std::unique_ptr<Engines::KeplerCompute> kepler_compute;
|
||||
/// DMA engine
|
||||
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
|
||||
/// Inline memory engine
|
||||
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
|
||||
|
||||
std::shared_ptr<MemoryManager> memory_manager;
|
||||
|
||||
std::unique_ptr<DmaPusher> dma_pusher;
|
||||
|
||||
bool initiated{};
|
||||
};
|
||||
|
||||
} // namespace Control
|
||||
|
||||
} // namespace Tegra
|
5
src/video_core/control/channel_state_cache.cpp
Normal file
5
src/video_core/control/channel_state_cache.cpp
Normal file
|
@ -0,0 +1,5 @@
|
|||
#include "video_core/control/channel_state_cache.inc"
|
||||
|
||||
namespace VideoCommon {
|
||||
template class VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo>;
|
||||
}
|
68
src/video_core/control/channel_state_cache.h
Normal file
68
src/video_core/control/channel_state_cache.h
Normal file
|
@ -0,0 +1,68 @@
|
|||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <limits>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
namespace Engines {
|
||||
class Maxwell3D;
|
||||
class KeplerCompute;
|
||||
} // namespace Engines
|
||||
|
||||
class MemoryManager;
|
||||
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
class ChannelInfo {
|
||||
public:
|
||||
ChannelInfo() = delete;
|
||||
ChannelInfo(Tegra::Control::ChannelState& state);
|
||||
ChannelInfo(const ChannelInfo& state) = delete;
|
||||
ChannelInfo& operator=(const ChannelInfo&) = delete;
|
||||
ChannelInfo(ChannelInfo&& other) = default;
|
||||
ChannelInfo& operator=(ChannelInfo&& other) = default;
|
||||
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
};
|
||||
|
||||
template <class P>
|
||||
class ChannelSetupCaches {
|
||||
public:
|
||||
/// Operations for seting the channel of execution.
|
||||
|
||||
/// Create channel state.
|
||||
void CreateChannel(Tegra::Control::ChannelState& channel);
|
||||
|
||||
/// Bind a channel for execution.
|
||||
void BindToChannel(s32 id);
|
||||
|
||||
/// Erase channel's state.
|
||||
void EraseChannel(s32 id);
|
||||
|
||||
protected:
|
||||
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
|
||||
|
||||
std::deque<P> channel_storage;
|
||||
std::deque<size_t> free_channel_ids;
|
||||
std::unordered_map<s32, size_t> channel_map;
|
||||
|
||||
P* channel_state;
|
||||
size_t current_channel_id{UNSET_CHANNEL};
|
||||
Tegra::Engines::Maxwell3D* maxwell3d;
|
||||
Tegra::Engines::KeplerCompute* kepler_compute;
|
||||
Tegra::MemoryManager* gpu_memory;
|
||||
};
|
||||
|
||||
} // namespace VideoCommon
|
64
src/video_core/control/channel_state_cache.inc
Normal file
64
src/video_core/control/channel_state_cache.inc
Normal file
|
@ -0,0 +1,64 @@
|
|||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/control/channel_state_cache.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& channel_state)
|
||||
: maxwell3d{*channel_state.maxwell_3d}, kepler_compute{*channel_state.kepler_compute},
|
||||
gpu_memory{*channel_state.memory_manager} {}
|
||||
|
||||
template <class P>
|
||||
void ChannelSetupCaches<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
|
||||
ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
|
||||
auto new_id = [this, &channel]() {
|
||||
if (!free_channel_ids.empty()) {
|
||||
auto id = free_channel_ids.front();
|
||||
free_channel_ids.pop_front();
|
||||
new (&channel_storage[id]) ChannelInfo(channel);
|
||||
return id;
|
||||
}
|
||||
channel_storage.emplace_back(channel);
|
||||
return channel_storage.size() - 1;
|
||||
}();
|
||||
channel_map.emplace(channel.bind_id, new_id);
|
||||
if (current_channel_id != UNSET_CHANNEL) {
|
||||
channel_state = &channel_storage[current_channel_id];
|
||||
}
|
||||
}
|
||||
|
||||
/// Bind a channel for execution.
|
||||
template <class P>
|
||||
void ChannelSetupCaches<P>::BindToChannel(s32 id) {
|
||||
auto it = channel_map.find(id);
|
||||
ASSERT(it != channel_map.end() && id >= 0);
|
||||
current_channel_id = it->second;
|
||||
channel_state = &channel_storage[current_channel_id];
|
||||
maxwell3d = &channel_state->maxwell3d;
|
||||
kepler_compute = &channel_state->kepler_compute;
|
||||
gpu_memory = &channel_state->gpu_memory;
|
||||
}
|
||||
|
||||
/// Erase channel's channel_state.
|
||||
template <class P>
|
||||
void ChannelSetupCaches<P>::EraseChannel(s32 id) {
|
||||
const auto it = channel_map.find(id);
|
||||
ASSERT(it != channel_map.end() && id >= 0);
|
||||
const auto this_id = it->second;
|
||||
free_channel_ids.push_back(this_id);
|
||||
channel_map.erase(it);
|
||||
if (this_id == current_channel_id) {
|
||||
current_channel_id = UNSET_CHANNEL;
|
||||
channel_state = nullptr;
|
||||
maxwell3d = nullptr;
|
||||
kepler_compute = nullptr;
|
||||
gpu_memory = nullptr;
|
||||
} else if (current_channel_id != UNSET_CHANNEL) {
|
||||
channel_state = &channel_storage[current_channel_id];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace VideoCommon
|
31
src/video_core/control/scheduler.cpp
Normal file
31
src/video_core/control/scheduler.cpp
Normal file
|
@ -0,0 +1,31 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/control/scheduler.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
||||
namespace Tegra::Control {
|
||||
Scheduler::Scheduler(GPU& gpu_) : gpu{gpu_} {}
|
||||
|
||||
Scheduler::~Scheduler() = default;
|
||||
|
||||
void Scheduler::Push(s32 channel, CommandList&& entries) {
|
||||
std::unique_lock<std::mutex> lk(scheduling_guard);
|
||||
auto it = channels.find(channel);
|
||||
auto channel_state = it->second;
|
||||
gpu.BindChannel(channel_state->bind_id);
|
||||
channel_state->dma_pusher->Push(std::move(entries));
|
||||
channel_state->dma_pusher->DispatchCalls();
|
||||
}
|
||||
|
||||
void Scheduler::DeclareChannel(std::shared_ptr<ChannelState> new_channel) {
|
||||
s32 channel = new_channel->bind_id;
|
||||
std::unique_lock<std::mutex> lk(scheduling_guard);
|
||||
channels.emplace(channel, new_channel);
|
||||
}
|
||||
|
||||
} // namespace Tegra::Control
|
38
src/video_core/control/scheduler.h
Normal file
38
src/video_core/control/scheduler.h
Normal file
|
@ -0,0 +1,38 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "video_core/dma_pusher.h"
|
||||
|
||||
namespace Tegra {
|
||||
|
||||
class GPU;
|
||||
|
||||
namespace Control {
|
||||
|
||||
struct ChannelState;
|
||||
|
||||
class Scheduler {
|
||||
public:
|
||||
Scheduler(GPU& gpu_);
|
||||
~Scheduler();
|
||||
|
||||
void Push(s32 channel, CommandList&& entries);
|
||||
|
||||
void DeclareChannel(std::shared_ptr<ChannelState> new_channel);
|
||||
|
||||
private:
|
||||
std::unordered_map<s32, std::shared_ptr<ChannelState>> channels;
|
||||
std::mutex scheduling_guard;
|
||||
GPU& gpu;
|
||||
};
|
||||
|
||||
} // namespace Control
|
||||
|
||||
} // namespace Tegra
|
|
@ -12,7 +12,10 @@
|
|||
|
||||
namespace Tegra {
|
||||
|
||||
DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_) : gpu{gpu_}, system{system_} {}
|
||||
DmaPusher::DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
|
||||
Control::ChannelState& channel_state_)
|
||||
: gpu{gpu_}, system{system_}, memory_manager{memory_manager_}, puller{gpu_, memory_manager_,
|
||||
*this, channel_state_} {}
|
||||
|
||||
DmaPusher::~DmaPusher() = default;
|
||||
|
||||
|
@ -76,11 +79,11 @@ bool DmaPusher::Step() {
|
|||
// Push buffer non-empty, read a word
|
||||
command_headers.resize(command_list_header.size);
|
||||
if (Settings::IsGPULevelHigh()) {
|
||||
gpu.MemoryManager().ReadBlock(dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
memory_manager.ReadBlock(dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
} else {
|
||||
gpu.MemoryManager().ReadBlockUnsafe(dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
memory_manager.ReadBlockUnsafe(dma_get, command_headers.data(),
|
||||
command_list_header.size * sizeof(u32));
|
||||
}
|
||||
}
|
||||
for (std::size_t index = 0; index < command_headers.size();) {
|
||||
|
@ -154,7 +157,7 @@ void DmaPusher::SetState(const CommandHeader& command_header) {
|
|||
|
||||
void DmaPusher::CallMethod(u32 argument) const {
|
||||
if (dma_state.method < non_puller_methods) {
|
||||
gpu.CallMethod(GPU::MethodCall{
|
||||
puller.CallPullerMethod(Engines::Puller::MethodCall{
|
||||
dma_state.method,
|
||||
argument,
|
||||
dma_state.subchannel,
|
||||
|
@ -168,12 +171,16 @@ void DmaPusher::CallMethod(u32 argument) const {
|
|||
|
||||
void DmaPusher::CallMultiMethod(const u32* base_start, u32 num_methods) const {
|
||||
if (dma_state.method < non_puller_methods) {
|
||||
gpu.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
|
||||
dma_state.method_count);
|
||||
puller.CallMultiMethod(dma_state.method, dma_state.subchannel, base_start, num_methods,
|
||||
dma_state.method_count);
|
||||
} else {
|
||||
subchannels[dma_state.subchannel]->CallMultiMethod(dma_state.method, base_start,
|
||||
num_methods, dma_state.method_count);
|
||||
}
|
||||
}
|
||||
|
||||
void DmaPusher::BindRasterizer(VideoCore::RasterizerInterface* rasterizer) {
|
||||
puller.BindRasterizer(rasterizer);
|
||||
}
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
#include "video_core/engines/puller.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
|
@ -17,7 +18,12 @@ class System;
|
|||
|
||||
namespace Tegra {
|
||||
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
|
||||
class GPU;
|
||||
class MemoryManager;
|
||||
|
||||
enum class SubmissionMode : u32 {
|
||||
IncreasingOld = 0,
|
||||
|
@ -102,7 +108,8 @@ struct CommandList final {
|
|||
*/
|
||||
class DmaPusher final {
|
||||
public:
|
||||
explicit DmaPusher(Core::System& system_, GPU& gpu_);
|
||||
explicit DmaPusher(Core::System& system_, GPU& gpu_, MemoryManager& memory_manager_,
|
||||
Control::ChannelState& channel_state_);
|
||||
~DmaPusher();
|
||||
|
||||
void Push(CommandList&& entries) {
|
||||
|
@ -115,6 +122,8 @@ public:
|
|||
subchannels[subchannel_id] = engine;
|
||||
}
|
||||
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
private:
|
||||
static constexpr u32 non_puller_methods = 0x40;
|
||||
static constexpr u32 max_subchannels = 8;
|
||||
|
@ -148,6 +157,8 @@ private:
|
|||
|
||||
GPU& gpu;
|
||||
Core::System& system;
|
||||
MemoryManager& memory_manager;
|
||||
mutable Engines::Puller puller;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
297
src/video_core/engines/puller.cpp
Normal file
297
src/video_core/engines/puller.cpp
Normal file
|
@ -0,0 +1,297 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "common/settings.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/kepler_memory.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/engines/puller.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
Puller::Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher_,
|
||||
Control::ChannelState& channel_state_)
|
||||
: gpu{gpu_}, memory_manager{memory_manager_}, dma_pusher{dma_pusher_}, channel_state{
|
||||
channel_state_} {}
|
||||
|
||||
Puller::~Puller() = default;
|
||||
|
||||
void Puller::ProcessBindMethod(const MethodCall& method_call) {
|
||||
// Bind the current subchannel to the desired engine id.
|
||||
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
|
||||
method_call.argument);
|
||||
const auto engine_id = static_cast<EngineID>(method_call.argument);
|
||||
bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
|
||||
switch (engine_id) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
dma_pusher.BindSubchannel(channel_state.fermi_2d.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
dma_pusher.BindSubchannel(channel_state.maxwell_3d.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
dma_pusher.BindSubchannel(channel_state.kepler_compute.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
dma_pusher.BindSubchannel(channel_state.maxwell_dma.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
dma_pusher.BindSubchannel(channel_state.kepler_memory.get(), method_call.subchannel);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::ProcessFenceActionMethod() {
|
||||
switch (regs.fence_action.op) {
|
||||
case Puller::FenceOperation::Acquire:
|
||||
// UNIMPLEMENTED_MSG("Channel Scheduling pending.");
|
||||
// WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
|
||||
break;
|
||||
case Puller::FenceOperation::Increment:
|
||||
rasterizer->SignalSyncPoint(regs.fence_action.syncpoint_id);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::ProcessWaitForInterruptMethod() {
|
||||
// TODO(bunnei) ImplementMe
|
||||
LOG_WARNING(HW_GPU, "(STUBBED) called");
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreTriggerMethod() {
|
||||
const auto semaphoreOperationMask = 0xF;
|
||||
const auto op =
|
||||
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
|
||||
if (op == GpuSemaphoreOperation::WriteLong) {
|
||||
struct Block {
|
||||
u32 sequence;
|
||||
u32 zeros = 0;
|
||||
u64 timestamp;
|
||||
};
|
||||
|
||||
Block block{};
|
||||
block.sequence = regs.semaphore_sequence;
|
||||
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
|
||||
// CoreTiming
|
||||
block.timestamp = gpu.GetTicks();
|
||||
memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block));
|
||||
} else {
|
||||
const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())};
|
||||
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
|
||||
(op == GpuSemaphoreOperation::AcquireGequal &&
|
||||
static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
|
||||
(op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
|
||||
// Nothing to do in this case
|
||||
} else {
|
||||
regs.acquire_source = true;
|
||||
regs.acquire_value = regs.semaphore_sequence;
|
||||
if (op == GpuSemaphoreOperation::AcquireEqual) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_mode = false;
|
||||
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_mode = true;
|
||||
} else if (op == GpuSemaphoreOperation::AcquireMask) {
|
||||
// TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
|
||||
// semaphore_sequence, gives a non-0 result
|
||||
LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
|
||||
} else {
|
||||
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreRelease() {
|
||||
memory_manager.Write<u32>(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release);
|
||||
}
|
||||
|
||||
void Puller::ProcessSemaphoreAcquire() {
|
||||
const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
||||
const auto value = regs.semaphore_acquire;
|
||||
if (word != value) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_value = value;
|
||||
// TODO(kemathe73) figure out how to do the acquire_timeout
|
||||
regs.acquire_mode = false;
|
||||
regs.acquire_source = false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU puller method.
|
||||
void Puller::CallPullerMethod(const MethodCall& method_call) {
|
||||
regs.reg_array[method_call.method] = method_call.argument;
|
||||
const auto method = static_cast<BufferMethods>(method_call.method);
|
||||
|
||||
switch (method) {
|
||||
case BufferMethods::BindObject: {
|
||||
ProcessBindMethod(method_call);
|
||||
break;
|
||||
}
|
||||
case BufferMethods::Nop:
|
||||
case BufferMethods::SemaphoreAddressHigh:
|
||||
case BufferMethods::SemaphoreAddressLow:
|
||||
case BufferMethods::SemaphoreSequence:
|
||||
case BufferMethods::UnkCacheFlush:
|
||||
case BufferMethods::WrcacheFlush:
|
||||
case BufferMethods::FenceValue:
|
||||
break;
|
||||
case BufferMethods::RefCnt:
|
||||
rasterizer->SignalReference();
|
||||
break;
|
||||
case BufferMethods::FenceAction:
|
||||
ProcessFenceActionMethod();
|
||||
break;
|
||||
case BufferMethods::WaitForInterrupt:
|
||||
ProcessWaitForInterruptMethod();
|
||||
break;
|
||||
case BufferMethods::SemaphoreTrigger: {
|
||||
ProcessSemaphoreTriggerMethod();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::NotifyIntr: {
|
||||
// TODO(Kmather73): Research and implement this method.
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::Unk28: {
|
||||
// TODO(Kmather73): Research and implement this method.
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::SemaphoreAcquire: {
|
||||
ProcessSemaphoreAcquire();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::SemaphoreRelease: {
|
||||
ProcessSemaphoreRelease();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::Yield: {
|
||||
// TODO(Kmather73): Research and implement this method.
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU engine method.
|
||||
void Puller::CallEngineMethod(const MethodCall& method_call) {
|
||||
const EngineID engine = bound_engines[method_call.subchannel];
|
||||
|
||||
switch (engine) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
channel_state.fermi_2d->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
channel_state.maxwell_3d->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
channel_state.kepler_compute->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
channel_state.maxwell_dma->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
channel_state.kepler_memory->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine");
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU engine multivalue method.
|
||||
void Puller::CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
const EngineID engine = bound_engines[subchannel];
|
||||
|
||||
switch (engine) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
channel_state.fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
channel_state.maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
channel_state.kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
channel_state.maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
channel_state.kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine");
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU method.
|
||||
void Puller::CallMethod(const MethodCall& method_call) {
|
||||
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
|
||||
method_call.subchannel);
|
||||
|
||||
ASSERT(method_call.subchannel < bound_engines.size());
|
||||
|
||||
if (ExecuteMethodOnEngine(method_call.method)) {
|
||||
CallEngineMethod(method_call);
|
||||
} else {
|
||||
CallPullerMethod(method_call);
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU multivalue method.
|
||||
void Puller::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
|
||||
|
||||
ASSERT(subchannel < bound_engines.size());
|
||||
|
||||
if (ExecuteMethodOnEngine(method)) {
|
||||
CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
|
||||
} else {
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallPullerMethod(MethodCall{
|
||||
method,
|
||||
base_start[i],
|
||||
subchannel,
|
||||
methods_pending - static_cast<u32>(i),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Puller::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) {
|
||||
rasterizer = rasterizer_;
|
||||
}
|
||||
|
||||
/// Determines where the method should be executed.
|
||||
[[nodiscard]] bool Puller::ExecuteMethodOnEngine(u32 method) {
|
||||
const auto buffer_method = static_cast<BufferMethods>(method);
|
||||
return buffer_method >= BufferMethods::NonPullerMethods;
|
||||
}
|
||||
|
||||
} // namespace Tegra::Engines
|
179
src/video_core/engines/puller.h
Normal file
179
src/video_core/engines/puller.h
Normal file
|
@ -0,0 +1,179 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <vector>
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/engines/engine_interface.h"
|
||||
|
||||
namespace Core {
|
||||
class System;
|
||||
}
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
class DmaPusher;
|
||||
|
||||
enum class EngineID {
|
||||
FERMI_TWOD_A = 0x902D, // 2D Engine
|
||||
MAXWELL_B = 0xB197, // 3D Engine
|
||||
KEPLER_COMPUTE_B = 0xB1C0,
|
||||
KEPLER_INLINE_TO_MEMORY_B = 0xA140,
|
||||
MAXWELL_DMA_COPY_A = 0xB0B5,
|
||||
};
|
||||
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
} // namespace Tegra
|
||||
|
||||
namespace VideoCore {
|
||||
class RasterizerInterface;
|
||||
}
|
||||
|
||||
namespace Tegra::Engines {
|
||||
|
||||
class Puller final {
|
||||
public:
|
||||
struct MethodCall {
|
||||
u32 method{};
|
||||
u32 argument{};
|
||||
u32 subchannel{};
|
||||
u32 method_count{};
|
||||
|
||||
explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
|
||||
: method(method_), argument(argument_), subchannel(subchannel_),
|
||||
method_count(method_count_) {}
|
||||
|
||||
[[nodiscard]] bool IsLastCall() const {
|
||||
return method_count <= 1;
|
||||
}
|
||||
};
|
||||
|
||||
enum class FenceOperation : u32 {
|
||||
Acquire = 0,
|
||||
Increment = 1,
|
||||
};
|
||||
|
||||
union FenceAction {
|
||||
u32 raw;
|
||||
BitField<0, 1, FenceOperation> op;
|
||||
BitField<8, 24, u32> syncpoint_id;
|
||||
};
|
||||
|
||||
explicit Puller(GPU& gpu_, MemoryManager& memory_manager_, DmaPusher& dma_pusher,
|
||||
Control::ChannelState& channel_state);
|
||||
~Puller();
|
||||
|
||||
void CallMethod(const MethodCall& method_call);
|
||||
|
||||
void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending);
|
||||
|
||||
void BindRasterizer(VideoCore::RasterizerInterface* rasterizer);
|
||||
|
||||
void CallPullerMethod(const MethodCall& method_call);
|
||||
|
||||
void CallEngineMethod(const MethodCall& method_call);
|
||||
|
||||
void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending);
|
||||
|
||||
private:
|
||||
Tegra::GPU& gpu;
|
||||
|
||||
MemoryManager& memory_manager;
|
||||
DmaPusher& dma_pusher;
|
||||
Control::ChannelState& channel_state;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
|
||||
static constexpr std::size_t NUM_REGS = 0x800;
|
||||
struct Regs {
|
||||
static constexpr size_t NUM_REGS = 0x40;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS_NOINIT(0x4);
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
|
||||
[[nodiscard]] GPUVAddr SemaphoreAddress() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} semaphore_address;
|
||||
|
||||
u32 semaphore_sequence;
|
||||
u32 semaphore_trigger;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xC);
|
||||
|
||||
// The pusher and the puller share the reference counter, the pusher only has read
|
||||
// access
|
||||
u32 reference_count;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x5);
|
||||
|
||||
u32 semaphore_acquire;
|
||||
u32 semaphore_release;
|
||||
u32 fence_value;
|
||||
FenceAction fence_action;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xE2);
|
||||
|
||||
// Puller state
|
||||
u32 acquire_mode;
|
||||
u32 acquire_source;
|
||||
u32 acquire_active;
|
||||
u32 acquire_timeout;
|
||||
u32 acquire_value;
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
void ProcessBindMethod(const MethodCall& method_call);
|
||||
void ProcessFenceActionMethod();
|
||||
void ProcessSemaphoreAcquire();
|
||||
void ProcessSemaphoreRelease();
|
||||
void ProcessSemaphoreTriggerMethod();
|
||||
void ProcessWaitForInterruptMethod();
|
||||
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
|
||||
|
||||
/// Mapping of command subchannels to their bound engine ids
|
||||
std::array<EngineID, 8> bound_engines{};
|
||||
|
||||
enum class GpuSemaphoreOperation {
|
||||
AcquireEqual = 0x1,
|
||||
WriteLong = 0x2,
|
||||
AcquireGequal = 0x4,
|
||||
AcquireMask = 0x8,
|
||||
};
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(semaphore_address, 0x4);
|
||||
ASSERT_REG_POSITION(semaphore_sequence, 0x6);
|
||||
ASSERT_REG_POSITION(semaphore_trigger, 0x7);
|
||||
ASSERT_REG_POSITION(reference_count, 0x14);
|
||||
ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
|
||||
ASSERT_REG_POSITION(semaphore_release, 0x1B);
|
||||
ASSERT_REG_POSITION(fence_value, 0x1C);
|
||||
ASSERT_REG_POSITION(fence_action, 0x1D);
|
||||
|
||||
ASSERT_REG_POSITION(acquire_mode, 0x100);
|
||||
ASSERT_REG_POSITION(acquire_source, 0x101);
|
||||
ASSERT_REG_POSITION(acquire_active, 0x102);
|
||||
ASSERT_REG_POSITION(acquire_timeout, 0x103);
|
||||
ASSERT_REG_POSITION(acquire_value, 0x104);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
};
|
||||
|
||||
} // namespace Tegra::Engines
|
|
@ -4,12 +4,13 @@
|
|||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <memory>
|
||||
#include <queue>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/delayed_destruction_ring.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
@ -19,10 +20,10 @@ public:
|
|||
explicit FenceBase(u32 payload_, bool is_stubbed_)
|
||||
: address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {}
|
||||
|
||||
explicit FenceBase(GPUVAddr address_, u32 payload_, bool is_stubbed_)
|
||||
explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_)
|
||||
: address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {}
|
||||
|
||||
GPUVAddr GetAddress() const {
|
||||
u8* GetAddress() const {
|
||||
return address;
|
||||
}
|
||||
|
||||
|
@ -35,7 +36,7 @@ public:
|
|||
}
|
||||
|
||||
private:
|
||||
GPUVAddr address;
|
||||
u8* address;
|
||||
u32 payload;
|
||||
bool is_semaphore;
|
||||
|
||||
|
@ -57,7 +58,7 @@ public:
|
|||
buffer_cache.AccumulateFlushes();
|
||||
}
|
||||
|
||||
void SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||
void SignalSemaphore(u8* addr, u32 value) {
|
||||
TryReleasePendingFences();
|
||||
const bool should_flush = ShouldFlush();
|
||||
CommitAsyncFlushes();
|
||||
|
@ -91,8 +92,9 @@ public:
|
|||
}
|
||||
PopAsyncFlushes();
|
||||
if (current_fence->IsSemaphore()) {
|
||||
gpu_memory.template Write<u32>(current_fence->GetAddress(),
|
||||
current_fence->GetPayload());
|
||||
char* address = reinterpret_cast<char*>(current_fence->GetAddress());
|
||||
auto payload = current_fence->GetPayload();
|
||||
std::memcpy(address, &payload, sizeof(payload));
|
||||
} else {
|
||||
gpu.IncrementSyncPoint(current_fence->GetPayload());
|
||||
}
|
||||
|
@ -104,8 +106,8 @@ protected:
|
|||
explicit FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::GPU& gpu_,
|
||||
TTextureCache& texture_cache_, TTBufferCache& buffer_cache_,
|
||||
TQueryCache& query_cache_)
|
||||
: rasterizer{rasterizer_}, gpu{gpu_}, gpu_memory{gpu.MemoryManager()},
|
||||
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
|
||||
: rasterizer{rasterizer_}, gpu{gpu_}, texture_cache{texture_cache_},
|
||||
buffer_cache{buffer_cache_}, query_cache{query_cache_} {}
|
||||
|
||||
virtual ~FenceManager() = default;
|
||||
|
||||
|
@ -113,7 +115,7 @@ protected:
|
|||
/// true
|
||||
virtual TFence CreateFence(u32 value, bool is_stubbed) = 0;
|
||||
/// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
|
||||
virtual TFence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) = 0;
|
||||
virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0;
|
||||
/// Queues a fence into the backend if the fence isn't stubbed.
|
||||
virtual void QueueFence(TFence& fence) = 0;
|
||||
/// Notifies that the backend fence has been signaled/reached in host GPU.
|
||||
|
@ -123,7 +125,6 @@ protected:
|
|||
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
TTextureCache& texture_cache;
|
||||
TTBufferCache& buffer_cache;
|
||||
TQueryCache& query_cache;
|
||||
|
@ -137,8 +138,9 @@ private:
|
|||
}
|
||||
PopAsyncFlushes();
|
||||
if (current_fence->IsSemaphore()) {
|
||||
gpu_memory.template Write<u32>(current_fence->GetAddress(),
|
||||
current_fence->GetPayload());
|
||||
char* address = reinterpret_cast<char*>(current_fence->GetAddress());
|
||||
const auto payload = current_fence->GetPayload();
|
||||
std::memcpy(address, &payload, sizeof(payload));
|
||||
} else {
|
||||
gpu.IncrementSyncPoint(current_fence->GetPayload());
|
||||
}
|
||||
|
|
|
@ -18,6 +18,8 @@
|
|||
#include "core/hle/service/nvdrv/nvdata.h"
|
||||
#include "core/perf_stats.h"
|
||||
#include "video_core/cdma_pusher.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/control/scheduler.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/engines/fermi_2d.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
|
@ -36,65 +38,58 @@ MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
|
|||
|
||||
struct GPU::Impl {
|
||||
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
|
||||
: gpu{gpu_}, system{system_}, memory_manager{std::make_unique<Tegra::MemoryManager>(
|
||||
system)},
|
||||
dma_pusher{std::make_unique<Tegra::DmaPusher>(system, gpu)}, use_nvdec{use_nvdec_},
|
||||
maxwell_3d{std::make_unique<Engines::Maxwell3D>(system, *memory_manager)},
|
||||
fermi_2d{std::make_unique<Engines::Fermi2D>()},
|
||||
kepler_compute{std::make_unique<Engines::KeplerCompute>(system, *memory_manager)},
|
||||
maxwell_dma{std::make_unique<Engines::MaxwellDMA>(system, *memory_manager)},
|
||||
kepler_memory{std::make_unique<Engines::KeplerMemory>(system, *memory_manager)},
|
||||
: gpu{gpu_}, system{system_}, use_nvdec{use_nvdec_},
|
||||
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
|
||||
gpu_thread{system_, is_async_} {}
|
||||
gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
|
||||
|
||||
~Impl() = default;
|
||||
|
||||
std::shared_ptr<Control::ChannelState> CreateChannel(s32 channel_id) {
|
||||
auto channel_state = std::make_shared<Tegra::Control::ChannelState>(channel_id);
|
||||
channels.emplace(channel_id, channel_state);
|
||||
scheduler->DeclareChannel(channel_state);
|
||||
return channel_state;
|
||||
}
|
||||
|
||||
void BindChannel(s32 channel_id) {
|
||||
if (bound_channel == channel_id) {
|
||||
return;
|
||||
}
|
||||
auto it = channels.find(channel_id);
|
||||
ASSERT(it != channels.end());
|
||||
bound_channel = channel_id;
|
||||
current_channel = it->second.get();
|
||||
|
||||
rasterizer->BindChannel(*current_channel);
|
||||
}
|
||||
|
||||
std::shared_ptr<Control::ChannelState> AllocateChannel() {
|
||||
return CreateChannel(new_channel_id++);
|
||||
}
|
||||
|
||||
void InitChannel(Control::ChannelState& to_init) {
|
||||
to_init.Init(system, gpu);
|
||||
to_init.BindRasterizer(rasterizer);
|
||||
rasterizer->InitializeChannel(to_init);
|
||||
}
|
||||
|
||||
void ReleaseChannel(Control::ChannelState& to_release) {
|
||||
UNIMPLEMENTED();
|
||||
}
|
||||
|
||||
void CreateHost1xChannel() {
|
||||
if (host1x_channel) {
|
||||
return;
|
||||
}
|
||||
host1x_channel = CreateChannel(0);
|
||||
host1x_channel->memory_manager = std::make_shared<Tegra::MemoryManager>(system);
|
||||
InitChannel(*host1x_channel);
|
||||
}
|
||||
|
||||
/// Binds a renderer to the GPU.
|
||||
void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) {
|
||||
renderer = std::move(renderer_);
|
||||
rasterizer = renderer->ReadRasterizer();
|
||||
|
||||
memory_manager->BindRasterizer(rasterizer);
|
||||
maxwell_3d->BindRasterizer(rasterizer);
|
||||
fermi_2d->BindRasterizer(rasterizer);
|
||||
kepler_compute->BindRasterizer(rasterizer);
|
||||
kepler_memory->BindRasterizer(rasterizer);
|
||||
maxwell_dma->BindRasterizer(rasterizer);
|
||||
}
|
||||
|
||||
/// Calls a GPU method.
|
||||
void CallMethod(const GPU::MethodCall& method_call) {
|
||||
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method_call.method,
|
||||
method_call.subchannel);
|
||||
|
||||
ASSERT(method_call.subchannel < bound_engines.size());
|
||||
|
||||
if (ExecuteMethodOnEngine(method_call.method)) {
|
||||
CallEngineMethod(method_call);
|
||||
} else {
|
||||
CallPullerMethod(method_call);
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU multivalue method.
|
||||
void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
LOG_TRACE(HW_GPU, "Processing method {:08X} on subchannel {}", method, subchannel);
|
||||
|
||||
ASSERT(subchannel < bound_engines.size());
|
||||
|
||||
if (ExecuteMethodOnEngine(method)) {
|
||||
CallEngineMultiMethod(method, subchannel, base_start, amount, methods_pending);
|
||||
} else {
|
||||
for (std::size_t i = 0; i < amount; i++) {
|
||||
CallPullerMethod(GPU::MethodCall{
|
||||
method,
|
||||
base_start[i],
|
||||
subchannel,
|
||||
methods_pending - static_cast<u32>(i),
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Flush all current written commands into the host GPU for execution.
|
||||
|
@ -146,42 +141,44 @@ struct GPU::Impl {
|
|||
|
||||
/// Returns a reference to the Maxwell3D GPU engine.
|
||||
[[nodiscard]] Engines::Maxwell3D& Maxwell3D() {
|
||||
return *maxwell_3d;
|
||||
ASSERT(current_channel);
|
||||
return *current_channel->maxwell_3d;
|
||||
}
|
||||
|
||||
/// Returns a const reference to the Maxwell3D GPU engine.
|
||||
[[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const {
|
||||
return *maxwell_3d;
|
||||
ASSERT(current_channel);
|
||||
return *current_channel->maxwell_3d;
|
||||
}
|
||||
|
||||
/// Returns a reference to the KeplerCompute GPU engine.
|
||||
[[nodiscard]] Engines::KeplerCompute& KeplerCompute() {
|
||||
return *kepler_compute;
|
||||
ASSERT(current_channel);
|
||||
return *current_channel->kepler_compute;
|
||||
}
|
||||
|
||||
/// Returns a reference to the KeplerCompute GPU engine.
|
||||
[[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const {
|
||||
return *kepler_compute;
|
||||
ASSERT(current_channel);
|
||||
return *current_channel->kepler_compute;
|
||||
}
|
||||
|
||||
/// Returns a reference to the GPU memory manager.
|
||||
[[nodiscard]] Tegra::MemoryManager& MemoryManager() {
|
||||
return *memory_manager;
|
||||
}
|
||||
|
||||
/// Returns a const reference to the GPU memory manager.
|
||||
[[nodiscard]] const Tegra::MemoryManager& MemoryManager() const {
|
||||
return *memory_manager;
|
||||
CreateHost1xChannel();
|
||||
return *host1x_channel->memory_manager;
|
||||
}
|
||||
|
||||
/// Returns a reference to the GPU DMA pusher.
|
||||
[[nodiscard]] Tegra::DmaPusher& DmaPusher() {
|
||||
return *dma_pusher;
|
||||
ASSERT(current_channel);
|
||||
return *current_channel->dma_pusher;
|
||||
}
|
||||
|
||||
/// Returns a const reference to the GPU DMA pusher.
|
||||
[[nodiscard]] const Tegra::DmaPusher& DmaPusher() const {
|
||||
return *dma_pusher;
|
||||
ASSERT(current_channel);
|
||||
return *current_channel->dma_pusher;
|
||||
}
|
||||
|
||||
/// Returns a reference to the underlying renderer.
|
||||
|
@ -306,7 +303,7 @@ struct GPU::Impl {
|
|||
/// This can be used to launch any necessary threads and register any necessary
|
||||
/// core timing events.
|
||||
void Start() {
|
||||
gpu_thread.StartThread(*renderer, renderer->Context(), *dma_pusher);
|
||||
gpu_thread.StartThread(*renderer, renderer->Context(), *scheduler);
|
||||
cpu_context = renderer->GetRenderWindow().CreateSharedContext();
|
||||
cpu_context->MakeCurrent();
|
||||
}
|
||||
|
@ -328,8 +325,8 @@ struct GPU::Impl {
|
|||
}
|
||||
|
||||
/// Push GPU command entries to be processed
|
||||
void PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
gpu_thread.SubmitList(std::move(entries));
|
||||
void PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
|
||||
gpu_thread.SubmitList(channel, std::move(entries));
|
||||
}
|
||||
|
||||
/// Push GPU command buffer entries to be processed
|
||||
|
@ -381,303 +378,16 @@ struct GPU::Impl {
|
|||
interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
|
||||
}
|
||||
|
||||
void ProcessBindMethod(const GPU::MethodCall& method_call) {
|
||||
// Bind the current subchannel to the desired engine id.
|
||||
LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", method_call.subchannel,
|
||||
method_call.argument);
|
||||
const auto engine_id = static_cast<EngineID>(method_call.argument);
|
||||
bound_engines[method_call.subchannel] = static_cast<EngineID>(engine_id);
|
||||
switch (engine_id) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
dma_pusher->BindSubchannel(fermi_2d.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
dma_pusher->BindSubchannel(maxwell_3d.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
dma_pusher->BindSubchannel(kepler_compute.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
dma_pusher->BindSubchannel(maxwell_dma.get(), method_call.subchannel);
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
dma_pusher->BindSubchannel(kepler_memory.get(), method_call.subchannel);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine {:04X}", engine_id);
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessFenceActionMethod() {
|
||||
switch (regs.fence_action.op) {
|
||||
case GPU::FenceOperation::Acquire:
|
||||
WaitFence(regs.fence_action.syncpoint_id, regs.fence_value);
|
||||
break;
|
||||
case GPU::FenceOperation::Increment:
|
||||
IncrementSyncPoint(regs.fence_action.syncpoint_id);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented operation {}", regs.fence_action.op.Value());
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessWaitForInterruptMethod() {
|
||||
// TODO(bunnei) ImplementMe
|
||||
LOG_WARNING(HW_GPU, "(STUBBED) called");
|
||||
}
|
||||
|
||||
void ProcessSemaphoreTriggerMethod() {
|
||||
const auto semaphoreOperationMask = 0xF;
|
||||
const auto op =
|
||||
static_cast<GpuSemaphoreOperation>(regs.semaphore_trigger & semaphoreOperationMask);
|
||||
if (op == GpuSemaphoreOperation::WriteLong) {
|
||||
struct Block {
|
||||
u32 sequence;
|
||||
u32 zeros = 0;
|
||||
u64 timestamp;
|
||||
};
|
||||
|
||||
Block block{};
|
||||
block.sequence = regs.semaphore_sequence;
|
||||
// TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
|
||||
// CoreTiming
|
||||
block.timestamp = GetTicks();
|
||||
memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
|
||||
sizeof(block));
|
||||
} else {
|
||||
const u32 word{memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress())};
|
||||
if ((op == GpuSemaphoreOperation::AcquireEqual && word == regs.semaphore_sequence) ||
|
||||
(op == GpuSemaphoreOperation::AcquireGequal &&
|
||||
static_cast<s32>(word - regs.semaphore_sequence) > 0) ||
|
||||
(op == GpuSemaphoreOperation::AcquireMask && (word & regs.semaphore_sequence))) {
|
||||
// Nothing to do in this case
|
||||
} else {
|
||||
regs.acquire_source = true;
|
||||
regs.acquire_value = regs.semaphore_sequence;
|
||||
if (op == GpuSemaphoreOperation::AcquireEqual) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_mode = false;
|
||||
} else if (op == GpuSemaphoreOperation::AcquireGequal) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_mode = true;
|
||||
} else if (op == GpuSemaphoreOperation::AcquireMask) {
|
||||
// TODO(kemathe) The acquire mask operation waits for a value that, ANDed with
|
||||
// semaphore_sequence, gives a non-0 result
|
||||
LOG_ERROR(HW_GPU, "Invalid semaphore operation AcquireMask not implemented");
|
||||
} else {
|
||||
LOG_ERROR(HW_GPU, "Invalid semaphore operation");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessSemaphoreRelease() {
|
||||
memory_manager->Write<u32>(regs.semaphore_address.SemaphoreAddress(),
|
||||
regs.semaphore_release);
|
||||
}
|
||||
|
||||
void ProcessSemaphoreAcquire() {
|
||||
const u32 word = memory_manager->Read<u32>(regs.semaphore_address.SemaphoreAddress());
|
||||
const auto value = regs.semaphore_acquire;
|
||||
if (word != value) {
|
||||
regs.acquire_active = true;
|
||||
regs.acquire_value = value;
|
||||
// TODO(kemathe73) figure out how to do the acquire_timeout
|
||||
regs.acquire_mode = false;
|
||||
regs.acquire_source = false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU puller method.
|
||||
void CallPullerMethod(const GPU::MethodCall& method_call) {
|
||||
regs.reg_array[method_call.method] = method_call.argument;
|
||||
const auto method = static_cast<BufferMethods>(method_call.method);
|
||||
|
||||
switch (method) {
|
||||
case BufferMethods::BindObject: {
|
||||
ProcessBindMethod(method_call);
|
||||
break;
|
||||
}
|
||||
case BufferMethods::Nop:
|
||||
case BufferMethods::SemaphoreAddressHigh:
|
||||
case BufferMethods::SemaphoreAddressLow:
|
||||
case BufferMethods::SemaphoreSequence:
|
||||
break;
|
||||
case BufferMethods::UnkCacheFlush:
|
||||
rasterizer->SyncGuestHost();
|
||||
break;
|
||||
case BufferMethods::WrcacheFlush:
|
||||
rasterizer->SignalReference();
|
||||
break;
|
||||
case BufferMethods::FenceValue:
|
||||
break;
|
||||
case BufferMethods::RefCnt:
|
||||
rasterizer->SignalReference();
|
||||
break;
|
||||
case BufferMethods::FenceAction:
|
||||
ProcessFenceActionMethod();
|
||||
break;
|
||||
case BufferMethods::WaitForInterrupt:
|
||||
rasterizer->WaitForIdle();
|
||||
break;
|
||||
case BufferMethods::SemaphoreTrigger: {
|
||||
ProcessSemaphoreTriggerMethod();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::NotifyIntr: {
|
||||
// TODO(Kmather73): Research and implement this method.
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method NotifyIntr not implemented");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::Unk28: {
|
||||
// TODO(Kmather73): Research and implement this method.
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method Unk28 not implemented");
|
||||
break;
|
||||
}
|
||||
case BufferMethods::SemaphoreAcquire: {
|
||||
ProcessSemaphoreAcquire();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::SemaphoreRelease: {
|
||||
ProcessSemaphoreRelease();
|
||||
break;
|
||||
}
|
||||
case BufferMethods::Yield: {
|
||||
// TODO(Kmather73): Research and implement this method.
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method Yield not implemented");
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LOG_ERROR(HW_GPU, "Special puller engine method {:X} not implemented", method);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU engine method.
|
||||
void CallEngineMethod(const GPU::MethodCall& method_call) {
|
||||
const EngineID engine = bound_engines[method_call.subchannel];
|
||||
|
||||
switch (engine) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
fermi_2d->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
maxwell_3d->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
kepler_compute->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
maxwell_dma->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
kepler_memory->CallMethod(method_call.method, method_call.argument,
|
||||
method_call.IsLastCall());
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine");
|
||||
}
|
||||
}
|
||||
|
||||
/// Calls a GPU engine multivalue method.
|
||||
void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
const EngineID engine = bound_engines[subchannel];
|
||||
|
||||
switch (engine) {
|
||||
case EngineID::FERMI_TWOD_A:
|
||||
fermi_2d->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::MAXWELL_B:
|
||||
maxwell_3d->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::KEPLER_COMPUTE_B:
|
||||
kepler_compute->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::MAXWELL_DMA_COPY_A:
|
||||
maxwell_dma->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
case EngineID::KEPLER_INLINE_TO_MEMORY_B:
|
||||
kepler_memory->CallMultiMethod(method, base_start, amount, methods_pending);
|
||||
break;
|
||||
default:
|
||||
UNIMPLEMENTED_MSG("Unimplemented engine");
|
||||
}
|
||||
}
|
||||
|
||||
/// Determines where the method should be executed.
|
||||
[[nodiscard]] bool ExecuteMethodOnEngine(u32 method) {
|
||||
const auto buffer_method = static_cast<BufferMethods>(method);
|
||||
return buffer_method >= BufferMethods::NonPullerMethods;
|
||||
}
|
||||
|
||||
struct Regs {
|
||||
static constexpr size_t NUM_REGS = 0x40;
|
||||
|
||||
union {
|
||||
struct {
|
||||
INSERT_PADDING_WORDS_NOINIT(0x4);
|
||||
struct {
|
||||
u32 address_high;
|
||||
u32 address_low;
|
||||
|
||||
[[nodiscard]] GPUVAddr SemaphoreAddress() const {
|
||||
return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
|
||||
address_low);
|
||||
}
|
||||
} semaphore_address;
|
||||
|
||||
u32 semaphore_sequence;
|
||||
u32 semaphore_trigger;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xC);
|
||||
|
||||
// The pusher and the puller share the reference counter, the pusher only has read
|
||||
// access
|
||||
u32 reference_count;
|
||||
INSERT_PADDING_WORDS_NOINIT(0x5);
|
||||
|
||||
u32 semaphore_acquire;
|
||||
u32 semaphore_release;
|
||||
u32 fence_value;
|
||||
GPU::FenceAction fence_action;
|
||||
INSERT_PADDING_WORDS_NOINIT(0xE2);
|
||||
|
||||
// Puller state
|
||||
u32 acquire_mode;
|
||||
u32 acquire_source;
|
||||
u32 acquire_active;
|
||||
u32 acquire_timeout;
|
||||
u32 acquire_value;
|
||||
};
|
||||
std::array<u32, NUM_REGS> reg_array;
|
||||
};
|
||||
} regs{};
|
||||
|
||||
GPU& gpu;
|
||||
Core::System& system;
|
||||
std::unique_ptr<Tegra::MemoryManager> memory_manager;
|
||||
std::unique_ptr<Tegra::DmaPusher> dma_pusher;
|
||||
|
||||
std::map<u32, std::unique_ptr<Tegra::CDmaPusher>> cdma_pushers;
|
||||
std::unique_ptr<VideoCore::RendererBase> renderer;
|
||||
VideoCore::RasterizerInterface* rasterizer = nullptr;
|
||||
const bool use_nvdec;
|
||||
|
||||
/// Mapping of command subchannels to their bound engine ids
|
||||
std::array<EngineID, 8> bound_engines{};
|
||||
/// 3D engine
|
||||
std::unique_ptr<Engines::Maxwell3D> maxwell_3d;
|
||||
/// 2D engine
|
||||
std::unique_ptr<Engines::Fermi2D> fermi_2d;
|
||||
/// Compute engine
|
||||
std::unique_ptr<Engines::KeplerCompute> kepler_compute;
|
||||
/// DMA engine
|
||||
std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
|
||||
/// Inline memory engine
|
||||
std::unique_ptr<Engines::KeplerMemory> kepler_memory;
|
||||
std::shared_ptr<Control::ChannelState> host1x_channel;
|
||||
s32 new_channel_id{1};
|
||||
/// Shader build notifier
|
||||
std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
|
||||
/// When true, we are about to shut down emulation session, so terminate outstanding tasks
|
||||
|
@ -710,33 +420,10 @@ struct GPU::Impl {
|
|||
VideoCommon::GPUThread::ThreadManager gpu_thread;
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
|
||||
|
||||
#define ASSERT_REG_POSITION(field_name, position) \
|
||||
static_assert(offsetof(Regs, field_name) == position * 4, \
|
||||
"Field " #field_name " has invalid position")
|
||||
|
||||
ASSERT_REG_POSITION(semaphore_address, 0x4);
|
||||
ASSERT_REG_POSITION(semaphore_sequence, 0x6);
|
||||
ASSERT_REG_POSITION(semaphore_trigger, 0x7);
|
||||
ASSERT_REG_POSITION(reference_count, 0x14);
|
||||
ASSERT_REG_POSITION(semaphore_acquire, 0x1A);
|
||||
ASSERT_REG_POSITION(semaphore_release, 0x1B);
|
||||
ASSERT_REG_POSITION(fence_value, 0x1C);
|
||||
ASSERT_REG_POSITION(fence_action, 0x1D);
|
||||
|
||||
ASSERT_REG_POSITION(acquire_mode, 0x100);
|
||||
ASSERT_REG_POSITION(acquire_source, 0x101);
|
||||
ASSERT_REG_POSITION(acquire_active, 0x102);
|
||||
ASSERT_REG_POSITION(acquire_timeout, 0x103);
|
||||
ASSERT_REG_POSITION(acquire_value, 0x104);
|
||||
|
||||
#undef ASSERT_REG_POSITION
|
||||
|
||||
enum class GpuSemaphoreOperation {
|
||||
AcquireEqual = 0x1,
|
||||
WriteLong = 0x2,
|
||||
AcquireGequal = 0x4,
|
||||
AcquireMask = 0x8,
|
||||
};
|
||||
std::unique_ptr<Tegra::Control::Scheduler> scheduler;
|
||||
std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
|
||||
Tegra::Control::ChannelState* current_channel;
|
||||
s32 bound_channel{-1};
|
||||
};
|
||||
|
||||
GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
|
||||
|
@ -744,19 +431,26 @@ GPU::GPU(Core::System& system, bool is_async, bool use_nvdec)
|
|||
|
||||
GPU::~GPU() = default;
|
||||
|
||||
std::shared_ptr<Control::ChannelState> GPU::AllocateChannel() {
|
||||
return impl->AllocateChannel();
|
||||
}
|
||||
|
||||
void GPU::InitChannel(Control::ChannelState& to_init) {
|
||||
impl->InitChannel(to_init);
|
||||
}
|
||||
|
||||
void GPU::BindChannel(s32 channel_id) {
|
||||
impl->BindChannel(channel_id);
|
||||
}
|
||||
|
||||
void GPU::ReleaseChannel(Control::ChannelState& to_release) {
|
||||
impl->ReleaseChannel(to_release);
|
||||
}
|
||||
|
||||
void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer) {
|
||||
impl->BindRenderer(std::move(renderer));
|
||||
}
|
||||
|
||||
void GPU::CallMethod(const MethodCall& method_call) {
|
||||
impl->CallMethod(method_call);
|
||||
}
|
||||
|
||||
void GPU::CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending) {
|
||||
impl->CallMultiMethod(method, subchannel, base_start, amount, methods_pending);
|
||||
}
|
||||
|
||||
void GPU::FlushCommands() {
|
||||
impl->FlushCommands();
|
||||
}
|
||||
|
@ -881,8 +575,8 @@ void GPU::ReleaseContext() {
|
|||
impl->ReleaseContext();
|
||||
}
|
||||
|
||||
void GPU::PushGPUEntries(Tegra::CommandList&& entries) {
|
||||
impl->PushGPUEntries(std::move(entries));
|
||||
void GPU::PushGPUEntries(s32 channel, Tegra::CommandList&& entries) {
|
||||
impl->PushGPUEntries(channel, std::move(entries));
|
||||
}
|
||||
|
||||
void GPU::PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries) {
|
||||
|
|
|
@ -89,57 +89,20 @@ class Maxwell3D;
|
|||
class KeplerCompute;
|
||||
} // namespace Engines
|
||||
|
||||
enum class EngineID {
|
||||
FERMI_TWOD_A = 0x902D, // 2D Engine
|
||||
MAXWELL_B = 0xB197, // 3D Engine
|
||||
KEPLER_COMPUTE_B = 0xB1C0,
|
||||
KEPLER_INLINE_TO_MEMORY_B = 0xA140,
|
||||
MAXWELL_DMA_COPY_A = 0xB0B5,
|
||||
};
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
|
||||
class MemoryManager;
|
||||
|
||||
class GPU final {
|
||||
public:
|
||||
struct MethodCall {
|
||||
u32 method{};
|
||||
u32 argument{};
|
||||
u32 subchannel{};
|
||||
u32 method_count{};
|
||||
|
||||
explicit MethodCall(u32 method_, u32 argument_, u32 subchannel_ = 0, u32 method_count_ = 0)
|
||||
: method(method_), argument(argument_), subchannel(subchannel_),
|
||||
method_count(method_count_) {}
|
||||
|
||||
[[nodiscard]] bool IsLastCall() const {
|
||||
return method_count <= 1;
|
||||
}
|
||||
};
|
||||
|
||||
enum class FenceOperation : u32 {
|
||||
Acquire = 0,
|
||||
Increment = 1,
|
||||
};
|
||||
|
||||
union FenceAction {
|
||||
u32 raw;
|
||||
BitField<0, 1, FenceOperation> op;
|
||||
BitField<8, 24, u32> syncpoint_id;
|
||||
};
|
||||
|
||||
explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
|
||||
~GPU();
|
||||
|
||||
/// Binds a renderer to the GPU.
|
||||
void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
|
||||
|
||||
/// Calls a GPU method.
|
||||
void CallMethod(const MethodCall& method_call);
|
||||
|
||||
/// Calls a GPU multivalue method.
|
||||
void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
|
||||
u32 methods_pending);
|
||||
|
||||
/// Flush all current written commands into the host GPU for execution.
|
||||
void FlushCommands();
|
||||
/// Synchronizes CPU writes with Host GPU memory.
|
||||
|
@ -147,6 +110,14 @@ public:
|
|||
/// Signal the ending of command list.
|
||||
void OnCommandListEnd();
|
||||
|
||||
std::shared_ptr<Control::ChannelState> AllocateChannel();
|
||||
|
||||
void InitChannel(Control::ChannelState& to_init);
|
||||
|
||||
void BindChannel(s32 channel_id);
|
||||
|
||||
void ReleaseChannel(Control::ChannelState& to_release);
|
||||
|
||||
/// Request a host GPU memory flush from the CPU.
|
||||
[[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
|
||||
|
||||
|
@ -226,7 +197,7 @@ public:
|
|||
void ReleaseContext();
|
||||
|
||||
/// Push GPU command entries to be processed
|
||||
void PushGPUEntries(Tegra::CommandList&& entries);
|
||||
void PushGPUEntries(s32 channel, Tegra::CommandList&& entries);
|
||||
|
||||
/// Push GPU command buffer entries to be processed
|
||||
void PushCommandBuffer(u32 id, Tegra::ChCommandHeaderList& entries);
|
||||
|
@ -248,7 +219,7 @@ public:
|
|||
|
||||
private:
|
||||
struct Impl;
|
||||
std::unique_ptr<Impl> impl;
|
||||
mutable std::unique_ptr<Impl> impl;
|
||||
};
|
||||
|
||||
} // namespace Tegra
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "common/thread.h"
|
||||
#include "core/core.h"
|
||||
#include "core/frontend/emu_window.h"
|
||||
#include "video_core/control/scheduler.h"
|
||||
#include "video_core/dma_pusher.h"
|
||||
#include "video_core/gpu.h"
|
||||
#include "video_core/gpu_thread.h"
|
||||
|
@ -18,7 +19,7 @@ namespace VideoCommon::GPUThread {
|
|||
/// Runs the GPU thread
|
||||
static void RunThread(std::stop_token stop_token, Core::System& system,
|
||||
VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
|
||||
Tegra::DmaPusher& dma_pusher, SynchState& state) {
|
||||
Tegra::Control::Scheduler& scheduler, SynchState& state) {
|
||||
std::string name = "GPU";
|
||||
MicroProfileOnThreadCreate(name.c_str());
|
||||
SCOPE_EXIT({ MicroProfileOnThreadExit(); });
|
||||
|
@ -36,8 +37,7 @@ static void RunThread(std::stop_token stop_token, Core::System& system,
|
|||
break;
|
||||
}
|
||||
if (auto* submit_list = std::get_if<SubmitListCommand>(&next.data)) {
|
||||
dma_pusher.Push(std::move(submit_list->entries));
|
||||
dma_pusher.DispatchCalls();
|
||||
scheduler.Push(submit_list->channel, std::move(submit_list->entries));
|
||||
} else if (const auto* data = std::get_if<SwapBuffersCommand>(&next.data)) {
|
||||
renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr);
|
||||
} else if (std::holds_alternative<OnCommandListEndCommand>(next.data)) {
|
||||
|
@ -68,14 +68,14 @@ ThreadManager::~ThreadManager() = default;
|
|||
|
||||
void ThreadManager::StartThread(VideoCore::RendererBase& renderer,
|
||||
Core::Frontend::GraphicsContext& context,
|
||||
Tegra::DmaPusher& dma_pusher) {
|
||||
Tegra::Control::Scheduler& scheduler) {
|
||||
rasterizer = renderer.ReadRasterizer();
|
||||
thread = std::jthread(RunThread, std::ref(system), std::ref(renderer), std::ref(context),
|
||||
std::ref(dma_pusher), std::ref(state));
|
||||
std::ref(scheduler), std::ref(state));
|
||||
}
|
||||
|
||||
void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
|
||||
PushCommand(SubmitListCommand(std::move(entries)));
|
||||
void ThreadManager::SubmitList(s32 channel, Tegra::CommandList&& entries) {
|
||||
PushCommand(SubmitListCommand(channel, std::move(entries)));
|
||||
}
|
||||
|
||||
void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
|
||||
|
|
|
@ -15,7 +15,9 @@
|
|||
|
||||
namespace Tegra {
|
||||
struct FramebufferConfig;
|
||||
class DmaPusher;
|
||||
namespace Control {
|
||||
class Scheduler;
|
||||
}
|
||||
} // namespace Tegra
|
||||
|
||||
namespace Core {
|
||||
|
@ -34,8 +36,10 @@ namespace VideoCommon::GPUThread {
|
|||
|
||||
/// Command to signal to the GPU thread that a command list is ready for processing
|
||||
struct SubmitListCommand final {
|
||||
explicit SubmitListCommand(Tegra::CommandList&& entries_) : entries{std::move(entries_)} {}
|
||||
explicit SubmitListCommand(s32 channel_, Tegra::CommandList&& entries_)
|
||||
: channel{channel_}, entries{std::move(entries_)} {}
|
||||
|
||||
s32 channel;
|
||||
Tegra::CommandList entries;
|
||||
};
|
||||
|
||||
|
@ -112,10 +116,10 @@ public:
|
|||
|
||||
/// Creates and starts the GPU thread.
|
||||
void StartThread(VideoCore::RendererBase& renderer, Core::Frontend::GraphicsContext& context,
|
||||
Tegra::DmaPusher& dma_pusher);
|
||||
Tegra::Control::Scheduler& scheduler);
|
||||
|
||||
/// Push GPU command entries to be processed
|
||||
void SubmitList(Tegra::CommandList&& entries);
|
||||
void SubmitList(s32 channel, Tegra::CommandList&& entries);
|
||||
|
||||
/// Swap buffers (render frame)
|
||||
void SwapBuffers(const Tegra::FramebufferConfig* framebuffer);
|
||||
|
|
|
@ -133,11 +133,6 @@ void MemoryManager::SetPageEntry(GPUVAddr gpu_addr, PageEntry page_entry, std::s
|
|||
// TryLockPage(page_entry, size);
|
||||
auto& current_page = page_table[PageEntryIndex(gpu_addr)];
|
||||
|
||||
if ((!current_page.IsValid() && page_entry.IsValid()) ||
|
||||
current_page.ToAddress() != page_entry.ToAddress()) {
|
||||
rasterizer->ModifyGPUMemory(gpu_addr, size);
|
||||
}
|
||||
|
||||
current_page = page_entry;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
#include "common/assert.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/control/channel_state_cache.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
@ -90,13 +91,10 @@ private:
|
|||
};
|
||||
|
||||
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter>
|
||||
class QueryCacheBase {
|
||||
class QueryCacheBase : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
|
||||
public:
|
||||
explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_)
|
||||
: rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
|
||||
gpu_memory{gpu_memory_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
|
||||
explicit QueryCacheBase(VideoCore::RasterizerInterface& rasterizer_)
|
||||
: rasterizer{rasterizer_}, streams{{CounterStream{static_cast<QueryCache&>(*this),
|
||||
VideoCore::QueryType::SamplesPassed}}} {}
|
||||
|
||||
void InvalidateRegion(VAddr addr, std::size_t size) {
|
||||
|
@ -117,13 +115,13 @@ public:
|
|||
*/
|
||||
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
|
||||
std::unique_lock lock{mutex};
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||
const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
ASSERT(cpu_addr);
|
||||
|
||||
CachedQuery* query = TryGet(*cpu_addr);
|
||||
if (!query) {
|
||||
ASSERT_OR_EXECUTE(cpu_addr, return;);
|
||||
u8* const host_ptr = gpu_memory.GetPointer(gpu_addr);
|
||||
u8* const host_ptr = gpu_memory->GetPointer(gpu_addr);
|
||||
|
||||
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
|
||||
}
|
||||
|
@ -137,7 +135,7 @@ public:
|
|||
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
|
||||
void UpdateCounters() {
|
||||
std::unique_lock lock{mutex};
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const auto& regs = maxwell3d->regs;
|
||||
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
|
||||
}
|
||||
|
||||
|
@ -264,8 +262,6 @@ private:
|
|||
static constexpr unsigned YUZU_PAGEBITS = 12;
|
||||
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
|
||||
std::recursive_mutex mutex;
|
||||
|
||||
|
|
|
@ -16,6 +16,9 @@ class MemoryManager;
|
|||
namespace Engines {
|
||||
class AccelerateDMAInterface;
|
||||
}
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
} // namespace Tegra
|
||||
|
||||
namespace VideoCore {
|
||||
|
@ -137,5 +140,11 @@ public:
|
|||
/// Initialize disk cached resources for the game being emulated
|
||||
virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const DiskResourceLoadCallback& callback) {}
|
||||
|
||||
virtual void InitializeChannel(Tegra::Control::ChannelState& channel) {}
|
||||
|
||||
virtual void BindChannel(Tegra::Control::ChannelState& channel) {}
|
||||
|
||||
virtual void ReleaseChannel(s32 channel_id) {}
|
||||
};
|
||||
} // namespace VideoCore
|
||||
|
|
|
@ -12,7 +12,7 @@ namespace OpenGL {
|
|||
|
||||
GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {}
|
||||
|
||||
GLInnerFence::GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_)
|
||||
GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_)
|
||||
: FenceBase{address_, payload_, is_stubbed_} {}
|
||||
|
||||
GLInnerFence::~GLInnerFence() = default;
|
||||
|
@ -52,7 +52,7 @@ Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) {
|
|||
return std::make_shared<GLInnerFence>(value, is_stubbed);
|
||||
}
|
||||
|
||||
Fence FenceManagerOpenGL::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
|
||||
Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) {
|
||||
return std::make_shared<GLInnerFence>(addr, value, is_stubbed);
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,7 @@ namespace OpenGL {
|
|||
class GLInnerFence : public VideoCommon::FenceBase {
|
||||
public:
|
||||
explicit GLInnerFence(u32 payload_, bool is_stubbed_);
|
||||
explicit GLInnerFence(GPUVAddr address_, u32 payload_, bool is_stubbed_);
|
||||
explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_);
|
||||
~GLInnerFence();
|
||||
|
||||
void Queue();
|
||||
|
@ -41,7 +41,7 @@ public:
|
|||
|
||||
protected:
|
||||
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
|
||||
Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
|
||||
void QueueFence(Fence& fence) override;
|
||||
bool IsFenceSignaled(Fence& fence) const override;
|
||||
void WaitFence(Fence& fence) override;
|
||||
|
|
|
@ -26,9 +26,8 @@ constexpr GLenum GetTarget(VideoCore::QueryType type) {
|
|||
|
||||
} // Anonymous namespace
|
||||
|
||||
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_)
|
||||
: QueryCacheBase(rasterizer_, maxwell3d_, gpu_memory_), gl_rasterizer{rasterizer_} {}
|
||||
QueryCache::QueryCache(RasterizerOpenGL& rasterizer_)
|
||||
: QueryCacheBase(rasterizer_), gl_rasterizer{rasterizer_} {}
|
||||
|
||||
QueryCache::~QueryCache() = default;
|
||||
|
||||
|
|
|
@ -28,8 +28,7 @@ using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
|
|||
class QueryCache final
|
||||
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
|
||||
public:
|
||||
explicit QueryCache(RasterizerOpenGL& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_);
|
||||
explicit QueryCache(RasterizerOpenGL& rasterizer_);
|
||||
~QueryCache();
|
||||
|
||||
OGLQuery AllocateQuery(VideoCore::QueryType type);
|
||||
|
|
|
@ -60,12 +60,11 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
kepler_compute(gpu.KeplerCompute()), gpu_memory(gpu.MemoryManager()), device(device_),
|
||||
screen_info(screen_info_), program_manager(program_manager_), state_tracker(state_tracker_),
|
||||
texture_cache_runtime(device, program_manager, state_tracker),
|
||||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
buffer_cache_runtime(device),
|
||||
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
|
||||
shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
|
||||
buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
|
||||
query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
|
||||
texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device),
|
||||
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
|
||||
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
|
||||
state_tracker, gpu.ShaderNotify()),
|
||||
query_cache(*this), accelerate_dma(buffer_cache),
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}
|
||||
|
||||
RasterizerOpenGL::~RasterizerOpenGL() = default;
|
||||
|
@ -392,7 +391,8 @@ void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) {
|
|||
gpu_memory.Write<u32>(addr, value);
|
||||
return;
|
||||
}
|
||||
fence_manager.SignalSemaphore(addr, value);
|
||||
auto paddr = gpu_memory.GetPointer(addr);
|
||||
fence_manager.SignalSemaphore(paddr, value);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SignalSyncPoint(u32 value) {
|
||||
|
|
|
@ -151,16 +151,13 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs
|
|||
} // Anonymous namespace
|
||||
|
||||
ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_,
|
||||
TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
ProgramManager& program_manager_, StateTracker& state_tracker_,
|
||||
VideoCore::ShaderNotify& shader_notify_)
|
||||
: VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
|
||||
emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
|
||||
buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
|
||||
shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
|
||||
const Device& device_, TextureCache& texture_cache_,
|
||||
BufferCache& buffer_cache_, ProgramManager& program_manager_,
|
||||
StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_)
|
||||
: VideoCommon::ShaderCache{rasterizer_}, emu_window{emu_window_}, device{device_},
|
||||
texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, program_manager{program_manager_},
|
||||
state_tracker{state_tracker_}, shader_notify{shader_notify_},
|
||||
use_asynchronous_shaders{device.UseAsynchronousShaders()},
|
||||
profile{
|
||||
.supported_spirv = 0x00010000,
|
||||
|
||||
|
@ -310,7 +307,7 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
|
|||
current_pipeline = nullptr;
|
||||
return nullptr;
|
||||
}
|
||||
const auto& regs{maxwell3d.regs};
|
||||
const auto& regs{maxwell3d->regs};
|
||||
graphics_key.raw = 0;
|
||||
graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0);
|
||||
graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0
|
||||
|
@ -351,13 +348,13 @@ GraphicsPipeline* ShaderCache::BuiltPipeline(GraphicsPipeline* pipeline) const n
|
|||
}
|
||||
// If something is using depth, we can assume that games are not rendering anything which
|
||||
// will be used one time.
|
||||
if (maxwell3d.regs.zeta_enable) {
|
||||
if (maxwell3d->regs.zeta_enable) {
|
||||
return nullptr;
|
||||
}
|
||||
// If games are using a small index count, we can assume these are full screen quads.
|
||||
// Usually these shaders are only used once for building textures so we can assume they
|
||||
// can't be built async
|
||||
if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
|
||||
if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
|
||||
return pipeline;
|
||||
}
|
||||
return nullptr;
|
||||
|
@ -368,7 +365,7 @@ ComputePipeline* ShaderCache::CurrentComputePipeline() {
|
|||
if (!shader) {
|
||||
return nullptr;
|
||||
}
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
const ComputePipelineKey key{
|
||||
.unique_hash = shader->unique_hash,
|
||||
.shared_memory_size = qmd.shared_alloc,
|
||||
|
@ -481,8 +478,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
|
|||
}
|
||||
auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
|
||||
return std::make_unique<GraphicsPipeline>(
|
||||
device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
|
||||
thread_worker, &shader_notify, sources, sources_spirv, infos, key);
|
||||
device, texture_cache, buffer_cache, *gpu_memory, *maxwell3d, program_manager,
|
||||
state_tracker, thread_worker, &shader_notify, sources, sources_spirv, infos, key);
|
||||
|
||||
} catch (Shader::Exception& exception) {
|
||||
LOG_ERROR(Render_OpenGL, "{}", exception.what());
|
||||
|
@ -491,9 +488,9 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
|
|||
|
||||
std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
|
||||
const ComputePipelineKey& key, const VideoCommon::ShaderInfo* shader) {
|
||||
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
|
||||
const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
|
||||
env.SetCachedSize(shader->size_bytes);
|
||||
|
||||
main_pools.ReleaseContents();
|
||||
|
@ -536,8 +533,8 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
|
|||
break;
|
||||
}
|
||||
|
||||
return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, gpu_memory,
|
||||
kepler_compute, program_manager, program.info, code,
|
||||
return std::make_unique<ComputePipeline>(device, texture_cache, buffer_cache, *gpu_memory,
|
||||
*kepler_compute, program_manager, program.info, code,
|
||||
code_spirv);
|
||||
} catch (Shader::Exception& exception) {
|
||||
LOG_ERROR(Render_OpenGL, "{}", exception.what());
|
||||
|
|
|
@ -30,12 +30,9 @@ using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
|
|||
class ShaderCache : public VideoCommon::ShaderCache {
|
||||
public:
|
||||
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_,
|
||||
TextureCache& texture_cache_, BufferCache& buffer_cache_,
|
||||
ProgramManager& program_manager_, StateTracker& state_tracker_,
|
||||
VideoCore::ShaderNotify& shader_notify_);
|
||||
const Device& device_, TextureCache& texture_cache_,
|
||||
BufferCache& buffer_cache_, ProgramManager& program_manager_,
|
||||
StateTracker& state_tracker_, VideoCore::ShaderNotify& shader_notify_);
|
||||
~ShaderCache();
|
||||
|
||||
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
|
|
|
@ -95,20 +95,25 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
|
|||
Core::Frontend::EmuWindow& emu_window,
|
||||
Core::Memory::Memory& cpu_memory_, Tegra::GPU& gpu_,
|
||||
std::unique_ptr<Core::Frontend::GraphicsContext> context_) try
|
||||
: RendererBase(emu_window, std::move(context_)), telemetry_session(telemetry_session_),
|
||||
cpu_memory(cpu_memory_), gpu(gpu_), library(OpenLibrary()),
|
||||
: RendererBase(emu_window, std::move(context_)),
|
||||
telemetry_session(telemetry_session_),
|
||||
cpu_memory(cpu_memory_),
|
||||
gpu(gpu_),
|
||||
library(OpenLibrary()),
|
||||
instance(CreateInstance(library, dld, VK_API_VERSION_1_1, render_window.GetWindowInfo().type,
|
||||
true, Settings::values.renderer_debug.GetValue())),
|
||||
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
|
||||
surface(CreateSurface(instance, render_window)),
|
||||
device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
|
||||
state_tracker(gpu), scheduler(device, state_tracker),
|
||||
device(CreateDevice(instance, dld, *surface)),
|
||||
memory_allocator(device, false),
|
||||
state_tracker(gpu),
|
||||
scheduler(device, state_tracker),
|
||||
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
|
||||
render_window.GetFramebufferLayout().height, false),
|
||||
blit_screen(cpu_memory, render_window, device, memory_allocator, swapchain, scheduler,
|
||||
screen_info),
|
||||
rasterizer(render_window, gpu, gpu.MemoryManager(), cpu_memory, screen_info, device,
|
||||
memory_allocator, state_tracker, scheduler) {
|
||||
rasterizer(render_window, gpu, cpu_memory, screen_info, device, memory_allocator,
|
||||
state_tracker, scheduler) {
|
||||
Report();
|
||||
} catch (const vk::Exception& exception) {
|
||||
LOG_ERROR(Render_Vulkan, "Vulkan initialization failed with error: {}", exception.what());
|
||||
|
|
|
@ -14,7 +14,7 @@ namespace Vulkan {
|
|||
InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_)
|
||||
: FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {}
|
||||
|
||||
InnerFence::InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_)
|
||||
InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_)
|
||||
: FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {}
|
||||
|
||||
InnerFence::~InnerFence() = default;
|
||||
|
@ -52,7 +52,7 @@ Fence FenceManager::CreateFence(u32 value, bool is_stubbed) {
|
|||
return std::make_shared<InnerFence>(scheduler, value, is_stubbed);
|
||||
}
|
||||
|
||||
Fence FenceManager::CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) {
|
||||
Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) {
|
||||
return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed);
|
||||
}
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ class Scheduler;
|
|||
class InnerFence : public VideoCommon::FenceBase {
|
||||
public:
|
||||
explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_);
|
||||
explicit InnerFence(Scheduler& scheduler_, GPUVAddr address_, u32 payload_, bool is_stubbed_);
|
||||
explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_);
|
||||
~InnerFence();
|
||||
|
||||
void Queue();
|
||||
|
@ -51,7 +51,7 @@ public:
|
|||
|
||||
protected:
|
||||
Fence CreateFence(u32 value, bool is_stubbed) override;
|
||||
Fence CreateFence(GPUVAddr addr, u32 value, bool is_stubbed) override;
|
||||
Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override;
|
||||
void QueueFence(Fence& fence) override;
|
||||
bool IsFenceSignaled(Fence& fence) const override;
|
||||
void WaitFence(Fence& fence) override;
|
||||
|
|
|
@ -259,17 +259,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
|
|||
return std::memcmp(&rhs, this, Size()) == 0;
|
||||
}
|
||||
|
||||
PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_,
|
||||
PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device_,
|
||||
Scheduler& scheduler_, DescriptorPool& descriptor_pool_,
|
||||
UpdateDescriptorQueue& update_descriptor_queue_,
|
||||
RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
|
||||
TextureCache& texture_cache_, VideoCore::ShaderNotify& shader_notify_)
|
||||
: VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
|
||||
device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
|
||||
buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, shader_notify{shader_notify_},
|
||||
: VideoCommon::ShaderCache{rasterizer_}, device{device_}, scheduler{scheduler_},
|
||||
descriptor_pool{descriptor_pool_}, update_descriptor_queue{update_descriptor_queue_},
|
||||
render_pass_cache{render_pass_cache_}, buffer_cache{buffer_cache_},
|
||||
texture_cache{texture_cache_}, shader_notify{shader_notify_},
|
||||
use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()},
|
||||
workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"),
|
||||
serialization_thread(1, "VkPipelineSerialization") {
|
||||
|
@ -337,7 +335,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
|
|||
current_pipeline = nullptr;
|
||||
return nullptr;
|
||||
}
|
||||
graphics_key.state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported(),
|
||||
graphics_key.state.Refresh(*maxwell3d, device.IsExtExtendedDynamicStateSupported(),
|
||||
device.IsExtVertexInputDynamicStateSupported());
|
||||
|
||||
if (current_pipeline) {
|
||||
|
@ -357,7 +355,7 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
|
|||
if (!shader) {
|
||||
return nullptr;
|
||||
}
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
const ComputePipelineCacheKey key{
|
||||
.unique_hash = shader->unique_hash,
|
||||
.shared_memory_size = qmd.shared_alloc,
|
||||
|
@ -486,13 +484,13 @@ GraphicsPipeline* PipelineCache::BuiltPipeline(GraphicsPipeline* pipeline) const
|
|||
}
|
||||
// If something is using depth, we can assume that games are not rendering anything which
|
||||
// will be used one time.
|
||||
if (maxwell3d.regs.zeta_enable) {
|
||||
if (maxwell3d->regs.zeta_enable) {
|
||||
return nullptr;
|
||||
}
|
||||
// If games are using a small index count, we can assume these are full screen quads.
|
||||
// Usually these shaders are only used once for building textures so we can assume they
|
||||
// can't be built async
|
||||
if (maxwell3d.regs.index_array.count <= 6 || maxwell3d.regs.vertex_buffer.count <= 6) {
|
||||
if (maxwell3d->regs.index_array.count <= 6 || maxwell3d->regs.vertex_buffer.count <= 6) {
|
||||
return pipeline;
|
||||
}
|
||||
return nullptr;
|
||||
|
@ -558,7 +556,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
|||
}
|
||||
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
|
||||
return std::make_unique<GraphicsPipeline>(
|
||||
maxwell3d, gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
|
||||
*maxwell3d, *gpu_memory, scheduler, buffer_cache, texture_cache, &shader_notify, device,
|
||||
descriptor_pool, update_descriptor_queue, thread_worker, statistics, render_pass_cache, key,
|
||||
std::move(modules), infos);
|
||||
|
||||
|
@ -592,9 +590,9 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
|
||||
std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
||||
const ComputePipelineCacheKey& key, const ShaderInfo* shader) {
|
||||
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
|
||||
const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
|
||||
env.SetCachedSize(shader->size_bytes);
|
||||
|
||||
main_pools.ReleaseContents();
|
||||
|
|
|
@ -100,10 +100,8 @@ struct ShaderPools {
|
|||
|
||||
class PipelineCache : public VideoCommon::ShaderCache {
|
||||
public:
|
||||
explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute,
|
||||
Tegra::MemoryManager& gpu_memory, const Device& device,
|
||||
Scheduler& scheduler, DescriptorPool& descriptor_pool,
|
||||
explicit PipelineCache(RasterizerVulkan& rasterizer, const Device& device, Scheduler& scheduler,
|
||||
DescriptorPool& descriptor_pool,
|
||||
UpdateDescriptorQueue& update_descriptor_queue,
|
||||
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
|
||||
TextureCache& texture_cache, VideoCore::ShaderNotify& shader_notify_);
|
||||
|
|
|
@ -65,10 +65,9 @@ void QueryPool::Reserve(std::pair<VkQueryPool, u32> query) {
|
|||
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
|
||||
}
|
||||
|
||||
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
|
||||
const Device& device_, Scheduler& scheduler_)
|
||||
: QueryCacheBase{rasterizer_, maxwell3d_, gpu_memory_}, device{device_}, scheduler{scheduler_},
|
||||
QueryCache::QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
|
||||
Scheduler& scheduler_)
|
||||
: QueryCacheBase{rasterizer_}, device{device_}, scheduler{scheduler_},
|
||||
query_pools{
|
||||
QueryPool{device_, scheduler_, QueryType::SamplesPassed},
|
||||
} {}
|
||||
|
|
|
@ -52,9 +52,8 @@ private:
|
|||
class QueryCache final
|
||||
: public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream, HostCounter> {
|
||||
public:
|
||||
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_, Tegra::MemoryManager& gpu_memory_,
|
||||
const Device& device_, Scheduler& scheduler_);
|
||||
explicit QueryCache(VideoCore::RasterizerInterface& rasterizer_, const Device& device_,
|
||||
Scheduler& scheduler_);
|
||||
~QueryCache();
|
||||
|
||||
std::pair<VkQueryPool, u32> AllocateQuery(VideoCore::QueryType type);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "common/microprofile.h"
|
||||
#include "common/scope_exit.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/renderer_vulkan/blit_image.h"
|
||||
|
@ -148,14 +149,11 @@ DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instan
|
|||
} // Anonymous namespace
|
||||
|
||||
RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Tegra::MemoryManager& gpu_memory_,
|
||||
Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
|
||||
const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
StateTracker& state_tracker_, Scheduler& scheduler_)
|
||||
: RasterizerAccelerated{cpu_memory_}, gpu{gpu_},
|
||||
gpu_memory{gpu_memory_}, maxwell3d{gpu.Maxwell3D()}, kepler_compute{gpu.KeplerCompute()},
|
||||
screen_info{screen_info_}, device{device_}, memory_allocator{memory_allocator_},
|
||||
state_tracker{state_tracker_}, scheduler{scheduler_},
|
||||
: RasterizerAccelerated{cpu_memory_}, gpu{gpu_}, screen_info{screen_info_}, device{device_},
|
||||
memory_allocator{memory_allocator_}, state_tracker{state_tracker_}, scheduler{scheduler_},
|
||||
staging_pool(device, memory_allocator, scheduler), descriptor_pool(device, scheduler),
|
||||
update_descriptor_queue(device, scheduler),
|
||||
blit_image(device, scheduler, state_tracker, descriptor_pool),
|
||||
|
@ -165,14 +163,13 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
memory_allocator, staging_pool,
|
||||
blit_image, astc_decoder_pass,
|
||||
render_pass_cache},
|
||||
texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory),
|
||||
texture_cache(texture_cache_runtime, *this),
|
||||
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
|
||||
update_descriptor_queue, descriptor_pool),
|
||||
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
|
||||
pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
|
||||
descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
|
||||
texture_cache, gpu.ShaderNotify()),
|
||||
query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache},
|
||||
buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
|
||||
pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue,
|
||||
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),
|
||||
query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache},
|
||||
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler),
|
||||
wfi_event(device.GetLogical().CreateEvent()) {
|
||||
scheduler.SetQueryCache(query_cache);
|
||||
|
@ -199,8 +196,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
|||
|
||||
UpdateDynamicStates();
|
||||
|
||||
const auto& regs{maxwell3d.regs};
|
||||
const u32 num_instances{maxwell3d.mme_draw.instance_count};
|
||||
const auto& regs{maxwell3d->regs};
|
||||
const u32 num_instances{maxwell3d->mme_draw.instance_count};
|
||||
const DrawParams draw_params{MakeDrawParams(regs, num_instances, is_instanced, is_indexed)};
|
||||
scheduler.Record([draw_params](vk::CommandBuffer cmdbuf) {
|
||||
if (draw_params.is_indexed) {
|
||||
|
@ -218,14 +215,14 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
|||
void RasterizerVulkan::Clear() {
|
||||
MICROPROFILE_SCOPE(Vulkan_Clearing);
|
||||
|
||||
if (!maxwell3d.ShouldExecute()) {
|
||||
if (!maxwell3d->ShouldExecute()) {
|
||||
return;
|
||||
}
|
||||
FlushWork();
|
||||
|
||||
query_cache.UpdateCounters();
|
||||
|
||||
auto& regs = maxwell3d.regs;
|
||||
auto& regs = maxwell3d->regs;
|
||||
const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
|
||||
regs.clear_buffers.A;
|
||||
const bool use_depth = regs.clear_buffers.Z;
|
||||
|
@ -339,9 +336,9 @@ void RasterizerVulkan::DispatchCompute() {
|
|||
return;
|
||||
}
|
||||
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
|
||||
pipeline->Configure(kepler_compute, gpu_memory, scheduler, buffer_cache, texture_cache);
|
||||
pipeline->Configure(*kepler_compute, *gpu_memory, scheduler, buffer_cache, texture_cache);
|
||||
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
const std::array<u32, 3> dim{qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z};
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([dim](vk::CommandBuffer cmdbuf) { cmdbuf.Dispatch(dim[0], dim[1], dim[2]); });
|
||||
|
@ -451,10 +448,11 @@ void RasterizerVulkan::ModifyGPUMemory(GPUVAddr addr, u64 size) {
|
|||
|
||||
void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) {
|
||||
if (!gpu.IsAsync()) {
|
||||
gpu_memory.Write<u32>(addr, value);
|
||||
gpu_memory->Write<u32>(addr, value);
|
||||
return;
|
||||
}
|
||||
fence_manager.SignalSemaphore(addr, value);
|
||||
auto paddr = gpu_memory->GetPointer(addr);
|
||||
fence_manager.SignalSemaphore(paddr, value);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::SignalSyncPoint(u32 value) {
|
||||
|
@ -553,12 +551,12 @@ Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA()
|
|||
|
||||
void RasterizerVulkan::AccelerateInlineToMemory(GPUVAddr address, size_t copy_size,
|
||||
std::span<u8> memory) {
|
||||
auto cpu_addr = gpu_memory.GpuToCpuAddress(address);
|
||||
auto cpu_addr = gpu_memory->GpuToCpuAddress(address);
|
||||
if (!cpu_addr) [[unlikely]] {
|
||||
gpu_memory.WriteBlock(address, memory.data(), copy_size);
|
||||
gpu_memory->WriteBlock(address, memory.data(), copy_size);
|
||||
return;
|
||||
}
|
||||
gpu_memory.WriteBlockUnsafe(address, memory.data(), copy_size);
|
||||
gpu_memory->WriteBlockUnsafe(address, memory.data(), copy_size);
|
||||
{
|
||||
std::unique_lock<std::mutex> lock{buffer_cache.mutex};
|
||||
if (!buffer_cache.InlineMemory(*cpu_addr, copy_size, memory)) {
|
||||
|
@ -627,7 +625,7 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64
|
|||
}
|
||||
|
||||
void RasterizerVulkan::UpdateDynamicStates() {
|
||||
auto& regs = maxwell3d.regs;
|
||||
auto& regs = maxwell3d->regs;
|
||||
UpdateViewportsState(regs);
|
||||
UpdateScissorsState(regs);
|
||||
UpdateDepthBias(regs);
|
||||
|
@ -651,7 +649,7 @@ void RasterizerVulkan::UpdateDynamicStates() {
|
|||
}
|
||||
|
||||
void RasterizerVulkan::BeginTransformFeedback() {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const auto& regs = maxwell3d->regs;
|
||||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
|
@ -667,7 +665,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
|
|||
}
|
||||
|
||||
void RasterizerVulkan::EndTransformFeedback() {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const auto& regs = maxwell3d->regs;
|
||||
if (regs.tfb_enabled == 0) {
|
||||
return;
|
||||
}
|
||||
|
@ -917,7 +915,7 @@ void RasterizerVulkan::UpdateStencilTestEnable(Tegra::Engines::Maxwell3D::Regs&
|
|||
}
|
||||
|
||||
void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) {
|
||||
auto& dirty{maxwell3d.dirty.flags};
|
||||
auto& dirty{maxwell3d->dirty.flags};
|
||||
if (!dirty[Dirty::VertexInput]) {
|
||||
return;
|
||||
}
|
||||
|
@ -974,4 +972,41 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs)
|
|||
});
|
||||
}
|
||||
|
||||
void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) {
|
||||
CreateChannel(channel);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.CreateChannel(channel);
|
||||
buffer_cache.CreateChannel(channel);
|
||||
}
|
||||
pipeline_cache.CreateChannel(channel);
|
||||
query_cache.CreateChannel(channel);
|
||||
state_tracker.SetupTables(channel);
|
||||
}
|
||||
|
||||
void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) {
|
||||
const s32 channel_id = channel.bind_id;
|
||||
BindToChannel(channel_id);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.BindToChannel(channel_id);
|
||||
buffer_cache.BindToChannel(channel_id);
|
||||
}
|
||||
pipeline_cache.BindToChannel(channel_id);
|
||||
query_cache.BindToChannel(channel_id);
|
||||
state_tracker.ChangeChannel(channel);
|
||||
scheduler.InvalidateState();
|
||||
}
|
||||
|
||||
void RasterizerVulkan::ReleaseChannel(s32 channel_id) {
|
||||
EraseChannel(channel_id);
|
||||
{
|
||||
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
|
||||
texture_cache.EraseChannel(channel_id);
|
||||
buffer_cache.EraseChannel(channel_id);
|
||||
}
|
||||
pipeline_cache.EraseChannel(channel_id);
|
||||
query_cache.EraseChannel(channel_id);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/control/channel_state_cache.h"
|
||||
#include "video_core/engines/maxwell_dma.h"
|
||||
#include "video_core/rasterizer_accelerated.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
@ -54,13 +55,13 @@ private:
|
|||
BufferCache& buffer_cache;
|
||||
};
|
||||
|
||||
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
|
||||
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated,
|
||||
protected VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
|
||||
public:
|
||||
explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_,
|
||||
ScreenInfo& screen_info_, const Device& device_,
|
||||
MemoryAllocator& memory_allocator_, StateTracker& state_tracker_,
|
||||
Scheduler& scheduler_);
|
||||
Core::Memory::Memory& cpu_memory_, ScreenInfo& screen_info_,
|
||||
const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
StateTracker& state_tracker_, Scheduler& scheduler_);
|
||||
~RasterizerVulkan() override;
|
||||
|
||||
void Draw(bool is_indexed, bool is_instanced) override;
|
||||
|
@ -99,6 +100,12 @@ public:
|
|||
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||
|
||||
void InitializeChannel(Tegra::Control::ChannelState& channel) override;
|
||||
|
||||
void BindChannel(Tegra::Control::ChannelState& channel) override;
|
||||
|
||||
void ReleaseChannel(s32 channel_id) override;
|
||||
|
||||
private:
|
||||
static constexpr size_t MAX_TEXTURES = 192;
|
||||
static constexpr size_t MAX_IMAGES = 48;
|
||||
|
@ -134,9 +141,6 @@ private:
|
|||
void UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs);
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
|
||||
ScreenInfo& screen_info;
|
||||
const Device& device;
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "common/common_types.h"
|
||||
#include "core/core.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/gpu.h"
|
||||
|
@ -174,9 +175,8 @@ void SetupDirtyVertexBindings(Tables& tables) {
|
|||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
StateTracker::StateTracker(Tegra::GPU& gpu)
|
||||
: flags{gpu.Maxwell3D().dirty.flags}, invalidation_flags{MakeInvalidationFlags()} {
|
||||
auto& tables{gpu.Maxwell3D().dirty.tables};
|
||||
void StateTracker::SetupTables(Tegra::Control::ChannelState& channel_state) {
|
||||
auto& tables{channel_state.maxwell_3d->dirty.tables};
|
||||
SetupDirtyFlags(tables);
|
||||
SetupDirtyViewports(tables);
|
||||
SetupDirtyScissors(tables);
|
||||
|
@ -199,4 +199,11 @@ StateTracker::StateTracker(Tegra::GPU& gpu)
|
|||
SetupDirtyVertexBindings(tables);
|
||||
}
|
||||
|
||||
void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
|
||||
flags = &channel_state.maxwell_3d->dirty.flags;
|
||||
}
|
||||
|
||||
StateTracker::StateTracker(Tegra::GPU& gpu)
|
||||
: flags{}, invalidation_flags{MakeInvalidationFlags()} {}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -10,6 +10,12 @@
|
|||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
|
||||
namespace Tegra {
|
||||
namespace Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
} // namespace Tegra
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
namespace Dirty {
|
||||
|
@ -56,16 +62,16 @@ public:
|
|||
explicit StateTracker(Tegra::GPU& gpu);
|
||||
|
||||
void InvalidateCommandBufferState() {
|
||||
flags |= invalidation_flags;
|
||||
(*flags) |= invalidation_flags;
|
||||
current_topology = INVALID_TOPOLOGY;
|
||||
}
|
||||
|
||||
void InvalidateViewports() {
|
||||
flags[Dirty::Viewports] = true;
|
||||
(*flags)[Dirty::Viewports] = true;
|
||||
}
|
||||
|
||||
void InvalidateScissors() {
|
||||
flags[Dirty::Scissors] = true;
|
||||
(*flags)[Dirty::Scissors] = true;
|
||||
}
|
||||
|
||||
bool TouchViewports() {
|
||||
|
@ -139,16 +145,20 @@ public:
|
|||
return has_changed;
|
||||
}
|
||||
|
||||
void SetupTables(Tegra::Control::ChannelState& channel_state);
|
||||
|
||||
void ChangeChannel(Tegra::Control::ChannelState& channel_state);
|
||||
|
||||
private:
|
||||
static constexpr auto INVALID_TOPOLOGY = static_cast<Maxwell::PrimitiveTopology>(~0u);
|
||||
|
||||
bool Exchange(std::size_t id, bool new_value) const noexcept {
|
||||
const bool is_dirty = flags[id];
|
||||
flags[id] = new_value;
|
||||
const bool is_dirty = (*flags)[id];
|
||||
(*flags)[id] = new_value;
|
||||
return is_dirty;
|
||||
}
|
||||
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags& flags;
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
|
||||
Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
|
||||
Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
|
||||
};
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include "common/assert.h"
|
||||
#include "shader_recompiler/frontend/maxwell/control_flow.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
|
@ -33,29 +34,25 @@ void ShaderCache::SyncGuestHost() {
|
|||
RemovePendingShaders();
|
||||
}
|
||||
|
||||
ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_)
|
||||
: gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
|
||||
rasterizer{rasterizer_} {}
|
||||
ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
|
||||
|
||||
bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
|
||||
auto& dirty{maxwell3d.dirty.flags};
|
||||
auto& dirty{maxwell3d->dirty.flags};
|
||||
if (!dirty[VideoCommon::Dirty::Shaders]) {
|
||||
return last_shaders_valid;
|
||||
}
|
||||
dirty[VideoCommon::Dirty::Shaders] = false;
|
||||
|
||||
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
|
||||
const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
|
||||
for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
|
||||
if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
|
||||
if (!maxwell3d->regs.IsShaderConfigEnabled(index)) {
|
||||
unique_hashes[index] = 0;
|
||||
continue;
|
||||
}
|
||||
const auto& shader_config{maxwell3d.regs.shader_config[index]};
|
||||
const auto& shader_config{maxwell3d->regs.shader_config[index]};
|
||||
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
|
||||
const GPUVAddr shader_addr{base_addr + shader_config.offset};
|
||||
const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
|
||||
const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
|
||||
if (!cpu_shader_addr) {
|
||||
LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
|
||||
last_shaders_valid = false;
|
||||
|
@ -64,7 +61,7 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
|
|||
const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
|
||||
if (!shader_info) {
|
||||
const u32 start_address{shader_config.offset};
|
||||
GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
|
||||
GraphicsEnvironment env{*maxwell3d, *gpu_memory, program, base_addr, start_address};
|
||||
shader_info = MakeShaderInfo(env, *cpu_shader_addr);
|
||||
}
|
||||
shader_infos[index] = shader_info;
|
||||
|
@ -75,10 +72,10 @@ bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
|
|||
}
|
||||
|
||||
const ShaderInfo* ShaderCache::ComputeShader() {
|
||||
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const GPUVAddr program_base{kepler_compute->regs.code_loc.Address()};
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
const GPUVAddr shader_addr{program_base + qmd.program_start};
|
||||
const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
|
||||
const std::optional<VAddr> cpu_shader_addr{gpu_memory->GpuToCpuAddress(shader_addr)};
|
||||
if (!cpu_shader_addr) {
|
||||
LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
|
||||
return nullptr;
|
||||
|
@ -86,22 +83,22 @@ const ShaderInfo* ShaderCache::ComputeShader() {
|
|||
if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
|
||||
return shader;
|
||||
}
|
||||
ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
|
||||
ComputeEnvironment env{*kepler_compute, *gpu_memory, program_base, qmd.program_start};
|
||||
return MakeShaderInfo(env, *cpu_shader_addr);
|
||||
}
|
||||
|
||||
void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result,
|
||||
const std::array<u64, NUM_PROGRAMS>& unique_hashes) {
|
||||
size_t env_index{};
|
||||
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
|
||||
const GPUVAddr base_addr{maxwell3d->regs.code_address.CodeAddress()};
|
||||
for (size_t index = 0; index < NUM_PROGRAMS; ++index) {
|
||||
if (unique_hashes[index] == 0) {
|
||||
continue;
|
||||
}
|
||||
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
|
||||
auto& env{result.envs[index]};
|
||||
const u32 start_address{maxwell3d.regs.shader_config[index].offset};
|
||||
env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
|
||||
const u32 start_address{maxwell3d->regs.shader_config[index].offset};
|
||||
env = GraphicsEnvironment{*maxwell3d, *gpu_memory, program, base_addr, start_address};
|
||||
env.SetCachedSize(shader_infos[index]->size_bytes);
|
||||
result.env_ptrs[env_index++] = &env;
|
||||
}
|
||||
|
|
|
@ -12,6 +12,7 @@
|
|||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/control/channel_state_cache.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/shader_environment.h"
|
||||
|
||||
|
@ -19,6 +20,10 @@ namespace Tegra {
|
|||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace Tegra::Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
class GenericEnvironment;
|
||||
|
@ -28,7 +33,7 @@ struct ShaderInfo {
|
|||
size_t size_bytes{};
|
||||
};
|
||||
|
||||
class ShaderCache {
|
||||
class ShaderCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelInfo> {
|
||||
static constexpr u64 YUZU_PAGEBITS = 14;
|
||||
static constexpr u64 YUZU_PAGESIZE = u64(1) << YUZU_PAGEBITS;
|
||||
|
||||
|
@ -71,9 +76,7 @@ protected:
|
|||
}
|
||||
};
|
||||
|
||||
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_);
|
||||
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_);
|
||||
|
||||
/// @brief Update the hashes and information of shader stages
|
||||
/// @param unique_hashes Shader hashes to store into when a stage is enabled
|
||||
|
@ -88,10 +91,6 @@ protected:
|
|||
void GetGraphicsEnvironments(GraphicsEnvironments& result,
|
||||
const std::array<u64, NUM_PROGRAMS>& unique_hashes);
|
||||
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
|
||||
std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{};
|
||||
bool last_shaders_valid = false;
|
||||
|
||||
|
|
|
@ -88,6 +88,9 @@ struct ImageBase {
|
|||
u32 scale_rating = 0;
|
||||
u64 scale_tick = 0;
|
||||
bool has_scaled = false;
|
||||
|
||||
size_t channel = 0;
|
||||
|
||||
ImageFlagBits flags = ImageFlagBits::CpuModified;
|
||||
|
||||
GPUVAddr gpu_addr = 0;
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include "common/alignment.h"
|
||||
#include "common/settings.h"
|
||||
#include "video_core/control/channel_state.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/texture_cache/image_view_base.h"
|
||||
|
@ -29,12 +30,8 @@ using VideoCore::Surface::SurfaceType;
|
|||
using namespace Common::Literals;
|
||||
|
||||
template <class P>
|
||||
TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_)
|
||||
: runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_},
|
||||
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_} {
|
||||
TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& rasterizer_)
|
||||
: runtime{runtime_}, rasterizer{rasterizer_} {
|
||||
// Configure null sampler
|
||||
TSCEntry sampler_descriptor{};
|
||||
sampler_descriptor.min_filter.Assign(Tegra::Texture::TextureFilter::Linear);
|
||||
|
@ -42,6 +39,13 @@ TextureCache<P>::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface&
|
|||
sampler_descriptor.mipmap_filter.Assign(Tegra::Texture::TextureMipmapFilter::Linear);
|
||||
sampler_descriptor.cubemap_anisotropy.Assign(1);
|
||||
|
||||
// Setup channels
|
||||
current_channel_id = UNSET_CHANNEL;
|
||||
state = nullptr;
|
||||
maxwell3d = nullptr;
|
||||
kepler_compute = nullptr;
|
||||
gpu_memory = nullptr;
|
||||
|
||||
// Make sure the first index is reserved for the null resources
|
||||
// This way the null resource becomes a compile time constant
|
||||
void(slot_images.insert(NullImageParams{}));
|
||||
|
@ -93,7 +97,7 @@ void TextureCache<P>::RunGarbageCollector() {
|
|||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(map, copies);
|
||||
runtime.Finish();
|
||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||
}
|
||||
if (True(image.flags & ImageFlagBits::Tracked)) {
|
||||
UntrackImage(image, image_id);
|
||||
|
@ -152,22 +156,23 @@ void TextureCache<P>::MarkModification(ImageId id) noexcept {
|
|||
template <class P>
|
||||
template <bool has_blacklists>
|
||||
void TextureCache<P>::FillGraphicsImageViews(std::span<ImageViewInOut> views) {
|
||||
FillImageViews<has_blacklists>(graphics_image_table, graphics_image_view_ids, views);
|
||||
FillImageViews<has_blacklists>(state->graphics_image_table, state->graphics_image_view_ids,
|
||||
views);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
|
||||
FillImageViews<true>(compute_image_table, compute_image_view_ids, views);
|
||||
FillImageViews<true>(state->compute_image_table, state->compute_image_view_ids, views);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
|
||||
if (index > graphics_sampler_table.Limit()) {
|
||||
if (index > state->graphics_sampler_table.Limit()) {
|
||||
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
|
||||
return &slot_samplers[NULL_SAMPLER_ID];
|
||||
}
|
||||
const auto [descriptor, is_new] = graphics_sampler_table.Read(index);
|
||||
SamplerId& id = graphics_sampler_ids[index];
|
||||
const auto [descriptor, is_new] = state->graphics_sampler_table.Read(index);
|
||||
SamplerId& id = state->graphics_sampler_ids[index];
|
||||
if (is_new) {
|
||||
id = FindSampler(descriptor);
|
||||
}
|
||||
|
@ -176,12 +181,12 @@ typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
|
|||
|
||||
template <class P>
|
||||
typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
|
||||
if (index > compute_sampler_table.Limit()) {
|
||||
if (index > state->compute_sampler_table.Limit()) {
|
||||
LOG_DEBUG(HW_GPU, "Invalid sampler index={}", index);
|
||||
return &slot_samplers[NULL_SAMPLER_ID];
|
||||
}
|
||||
const auto [descriptor, is_new] = compute_sampler_table.Read(index);
|
||||
SamplerId& id = compute_sampler_ids[index];
|
||||
const auto [descriptor, is_new] = state->compute_sampler_table.Read(index);
|
||||
SamplerId& id = state->compute_sampler_ids[index];
|
||||
if (is_new) {
|
||||
id = FindSampler(descriptor);
|
||||
}
|
||||
|
@ -191,34 +196,34 @@ typename P::Sampler* TextureCache<P>::GetComputeSampler(u32 index) {
|
|||
template <class P>
|
||||
void TextureCache<P>::SynchronizeGraphicsDescriptors() {
|
||||
using SamplerIndex = Tegra::Engines::Maxwell3D::Regs::SamplerIndex;
|
||||
const bool linked_tsc = maxwell3d.regs.sampler_index == SamplerIndex::ViaHeaderIndex;
|
||||
const u32 tic_limit = maxwell3d.regs.tic.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d.regs.tsc.limit;
|
||||
if (graphics_sampler_table.Synchornize(maxwell3d.regs.tsc.Address(), tsc_limit)) {
|
||||
graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
|
||||
const bool linked_tsc = maxwell3d->regs.sampler_index == SamplerIndex::ViaHeaderIndex;
|
||||
const u32 tic_limit = maxwell3d->regs.tic.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : maxwell3d->regs.tsc.limit;
|
||||
if (state->graphics_sampler_table.Synchornize(maxwell3d->regs.tsc.Address(), tsc_limit)) {
|
||||
state->graphics_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
|
||||
}
|
||||
if (graphics_image_table.Synchornize(maxwell3d.regs.tic.Address(), tic_limit)) {
|
||||
graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
|
||||
if (state->graphics_image_table.Synchornize(maxwell3d->regs.tic.Address(), tic_limit)) {
|
||||
state->graphics_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::SynchronizeComputeDescriptors() {
|
||||
const bool linked_tsc = kepler_compute.launch_description.linked_tsc;
|
||||
const u32 tic_limit = kepler_compute.regs.tic.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute.regs.tsc.limit;
|
||||
const GPUVAddr tsc_gpu_addr = kepler_compute.regs.tsc.Address();
|
||||
if (compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
|
||||
compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
|
||||
const bool linked_tsc = kepler_compute->launch_description.linked_tsc;
|
||||
const u32 tic_limit = kepler_compute->regs.tic.limit;
|
||||
const u32 tsc_limit = linked_tsc ? tic_limit : kepler_compute->regs.tsc.limit;
|
||||
const GPUVAddr tsc_gpu_addr = kepler_compute->regs.tsc.Address();
|
||||
if (state->compute_sampler_table.Synchornize(tsc_gpu_addr, tsc_limit)) {
|
||||
state->compute_sampler_ids.resize(tsc_limit + 1, CORRUPT_ID);
|
||||
}
|
||||
if (compute_image_table.Synchornize(kepler_compute.regs.tic.Address(), tic_limit)) {
|
||||
compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
|
||||
if (state->compute_image_table.Synchornize(kepler_compute->regs.tic.Address(), tic_limit)) {
|
||||
state->compute_image_view_ids.resize(tic_limit + 1, CORRUPT_ID);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
u32 scale_rating = 0;
|
||||
bool rescaled = false;
|
||||
std::array<ImageId, NUM_RT> tmp_color_images{};
|
||||
|
@ -315,7 +320,7 @@ bool TextureCache<P>::RescaleRenderTargets(bool is_clear) {
|
|||
template <class P>
|
||||
void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
||||
using namespace VideoCommon::Dirty;
|
||||
auto& flags = maxwell3d.dirty.flags;
|
||||
auto& flags = maxwell3d->dirty.flags;
|
||||
if (!flags[Dirty::RenderTargets]) {
|
||||
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||
ImageViewId& color_buffer_id = render_targets.color_buffer_ids[index];
|
||||
|
@ -342,7 +347,7 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
|||
PrepareImageView(depth_buffer_id, true, is_clear && IsFullClear(depth_buffer_id));
|
||||
|
||||
for (size_t index = 0; index < NUM_RT; ++index) {
|
||||
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d.regs.rt_control.Map(index));
|
||||
render_targets.draw_buffers[index] = static_cast<u8>(maxwell3d->regs.rt_control.Map(index));
|
||||
}
|
||||
u32 up_scale = 1;
|
||||
u32 down_shift = 0;
|
||||
|
@ -351,8 +356,8 @@ void TextureCache<P>::UpdateRenderTargets(bool is_clear) {
|
|||
down_shift = Settings::values.resolution_info.down_shift;
|
||||
}
|
||||
render_targets.size = Extent2D{
|
||||
(maxwell3d.regs.render_area.width * up_scale) >> down_shift,
|
||||
(maxwell3d.regs.render_area.height * up_scale) >> down_shift,
|
||||
(maxwell3d->regs.render_area.width * up_scale) >> down_shift,
|
||||
(maxwell3d->regs.render_area.height * up_scale) >> down_shift,
|
||||
};
|
||||
|
||||
flags[Dirty::DepthBiasGlobal] = true;
|
||||
|
@ -458,7 +463,7 @@ void TextureCache<P>::DownloadMemory(VAddr cpu_addr, size_t size) {
|
|||
const auto copies = FullDownloadCopies(image.info);
|
||||
image.DownloadMemory(map, copies);
|
||||
runtime.Finish();
|
||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -655,7 +660,7 @@ void TextureCache<P>::PopAsyncFlushes() {
|
|||
for (const ImageId image_id : download_ids) {
|
||||
const ImageBase& image = slot_images[image_id];
|
||||
const auto copies = FullDownloadCopies(image.info);
|
||||
SwizzleImage(gpu_memory, image.gpu_addr, image.info, copies, download_span);
|
||||
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span);
|
||||
download_map.offset += image.unswizzled_size_bytes;
|
||||
download_span = download_span.subspan(image.unswizzled_size_bytes);
|
||||
}
|
||||
|
@ -714,26 +719,26 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
|
|||
const GPUVAddr gpu_addr = image.gpu_addr;
|
||||
|
||||
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
|
||||
gpu_memory.ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
|
||||
gpu_memory->ReadBlockUnsafe(gpu_addr, mapped_span.data(), mapped_span.size_bytes());
|
||||
const auto uploads = FullUploadSwizzles(image.info);
|
||||
runtime.AccelerateImageUpload(image, staging, uploads);
|
||||
} else if (True(image.flags & ImageFlagBits::Converted)) {
|
||||
std::vector<u8> unswizzled_data(image.unswizzled_size_bytes);
|
||||
auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, unswizzled_data);
|
||||
auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, unswizzled_data);
|
||||
ConvertImage(unswizzled_data, image.info, mapped_span, copies);
|
||||
image.UploadMemory(staging, copies);
|
||||
} else {
|
||||
const auto copies = UnswizzleImage(gpu_memory, gpu_addr, image.info, mapped_span);
|
||||
const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, mapped_span);
|
||||
image.UploadMemory(staging, copies);
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::FindImageView(const TICEntry& config) {
|
||||
if (!IsValidEntry(gpu_memory, config)) {
|
||||
if (!IsValidEntry(*gpu_memory, config)) {
|
||||
return NULL_IMAGE_VIEW_ID;
|
||||
}
|
||||
const auto [pair, is_new] = image_views.try_emplace(config);
|
||||
const auto [pair, is_new] = state->image_views.try_emplace(config);
|
||||
ImageViewId& image_view_id = pair->second;
|
||||
if (is_new) {
|
||||
image_view_id = CreateImageView(config);
|
||||
|
@ -777,9 +782,9 @@ ImageId TextureCache<P>::FindOrInsertImage(const ImageInfo& info, GPUVAddr gpu_a
|
|||
template <class P>
|
||||
ImageId TextureCache<P>::FindImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||
RelaxedOptions options) {
|
||||
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||
std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr) {
|
||||
cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
|
||||
cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info));
|
||||
if (!cpu_addr) {
|
||||
return ImageId{};
|
||||
}
|
||||
|
@ -860,7 +865,7 @@ void TextureCache<P>::InvalidateScale(Image& image) {
|
|||
image.scale_tick = frame_tick + 1;
|
||||
}
|
||||
const std::span<const ImageViewId> image_view_ids = image.image_view_ids;
|
||||
auto& dirty = maxwell3d.dirty.flags;
|
||||
auto& dirty = maxwell3d->dirty.flags;
|
||||
dirty[Dirty::RenderTargets] = true;
|
||||
dirty[Dirty::ZetaBuffer] = true;
|
||||
for (size_t rt = 0; rt < NUM_RT; ++rt) {
|
||||
|
@ -881,11 +886,11 @@ void TextureCache<P>::InvalidateScale(Image& image) {
|
|||
image.image_view_ids.clear();
|
||||
image.image_view_infos.clear();
|
||||
if constexpr (ENABLE_VALIDATION) {
|
||||
std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
|
||||
std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
|
||||
std::ranges::fill(state->graphics_image_view_ids, CORRUPT_ID);
|
||||
std::ranges::fill(state->compute_image_view_ids, CORRUPT_ID);
|
||||
}
|
||||
graphics_image_table.Invalidate();
|
||||
compute_image_table.Invalidate();
|
||||
state->graphics_image_table.Invalidate();
|
||||
state->compute_image_table.Invalidate();
|
||||
has_deleted_images = true;
|
||||
}
|
||||
|
||||
|
@ -929,10 +934,10 @@ bool TextureCache<P>::ScaleDown(Image& image) {
|
|||
template <class P>
|
||||
ImageId TextureCache<P>::InsertImage(const ImageInfo& info, GPUVAddr gpu_addr,
|
||||
RelaxedOptions options) {
|
||||
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||
std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
if (!cpu_addr) {
|
||||
const auto size = CalculateGuestSizeInBytes(info);
|
||||
cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr, size);
|
||||
cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, size);
|
||||
if (!cpu_addr) {
|
||||
const VAddr fake_addr = ~(1ULL << 40ULL) + virtual_invalid_space;
|
||||
virtual_invalid_space += Common::AlignUp(size, 32);
|
||||
|
@ -1050,7 +1055,7 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA
|
|||
const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr);
|
||||
Image& new_image = slot_images[new_image_id];
|
||||
|
||||
if (!gpu_memory.IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
|
||||
if (!gpu_memory->IsContinousRange(new_image.gpu_addr, new_image.guest_size_bytes)) {
|
||||
new_image.flags |= ImageFlagBits::Sparse;
|
||||
}
|
||||
|
||||
|
@ -1192,7 +1197,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
|
|||
if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) {
|
||||
return NULL_SAMPLER_ID;
|
||||
}
|
||||
const auto [pair, is_new] = samplers.try_emplace(config);
|
||||
const auto [pair, is_new] = state->samplers.try_emplace(config);
|
||||
if (is_new) {
|
||||
pair->second = slot_samplers.insert(runtime, config);
|
||||
}
|
||||
|
@ -1201,7 +1206,7 @@ SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) {
|
|||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const auto& regs = maxwell3d->regs;
|
||||
if (index >= regs.rt_control.count) {
|
||||
return ImageViewId{};
|
||||
}
|
||||
|
@ -1219,7 +1224,7 @@ ImageViewId TextureCache<P>::FindColorBuffer(size_t index, bool is_clear) {
|
|||
|
||||
template <class P>
|
||||
ImageViewId TextureCache<P>::FindDepthBuffer(bool is_clear) {
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const auto& regs = maxwell3d->regs;
|
||||
if (!regs.zeta_enable) {
|
||||
return ImageViewId{};
|
||||
}
|
||||
|
@ -1321,8 +1326,8 @@ void TextureCache<P>::ForEachImageInRegionGPU(GPUVAddr gpu_addr, size_t size, Fu
|
|||
static constexpr bool BOOL_BREAK = std::is_same_v<FuncReturn, bool>;
|
||||
boost::container::small_vector<ImageId, 8> images;
|
||||
ForEachGPUPage(gpu_addr, size, [this, &images, gpu_addr, size, func](u64 page) {
|
||||
const auto it = gpu_page_table.find(page);
|
||||
if (it == gpu_page_table.end()) {
|
||||
const auto it = state->gpu_page_table.find(page);
|
||||
if (it == state->gpu_page_table.end()) {
|
||||
if constexpr (BOOL_BREAK) {
|
||||
return false;
|
||||
} else {
|
||||
|
@ -1403,9 +1408,9 @@ template <typename Func>
|
|||
void TextureCache<P>::ForEachSparseSegment(ImageBase& image, Func&& func) {
|
||||
using FuncReturn = typename std::invoke_result<Func, GPUVAddr, VAddr, size_t>::type;
|
||||
static constexpr bool RETURNS_BOOL = std::is_same_v<FuncReturn, bool>;
|
||||
const auto segments = gpu_memory.GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
|
||||
const auto segments = gpu_memory->GetSubmappedRange(image.gpu_addr, image.guest_size_bytes);
|
||||
for (const auto& [gpu_addr, size] : segments) {
|
||||
std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr);
|
||||
std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr);
|
||||
ASSERT(cpu_addr);
|
||||
if constexpr (RETURNS_BOOL) {
|
||||
if (func(gpu_addr, *cpu_addr, size)) {
|
||||
|
@ -1449,7 +1454,7 @@ void TextureCache<P>::RegisterImage(ImageId image_id) {
|
|||
image.lru_index = lru_cache.Insert(image_id, frame_tick);
|
||||
|
||||
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
||||
[this, image_id](u64 page) { gpu_page_table[page].push_back(image_id); });
|
||||
[this, image_id](u64 page) { state->gpu_page_table[page].push_back(image_id); });
|
||||
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||
auto map_id =
|
||||
slot_map_views.insert(image.gpu_addr, image.cpu_addr, image.guest_size_bytes, image_id);
|
||||
|
@ -1497,8 +1502,9 @@ void TextureCache<P>::UnregisterImage(ImageId image_id) {
|
|||
}
|
||||
image_ids.erase(vector_it);
|
||||
};
|
||||
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes,
|
||||
[this, &clear_page_table](u64 page) { clear_page_table(page, gpu_page_table); });
|
||||
ForEachGPUPage(image.gpu_addr, image.guest_size_bytes, [this, &clear_page_table](u64 page) {
|
||||
clear_page_table(page, state->gpu_page_table);
|
||||
});
|
||||
if (False(image.flags & ImageFlagBits::Sparse)) {
|
||||
const auto map_id = image.map_view_id;
|
||||
ForEachCPUPage(image.cpu_addr, image.guest_size_bytes, [this, map_id](u64 page) {
|
||||
|
@ -1631,7 +1637,7 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
|||
ASSERT_MSG(False(image.flags & ImageFlagBits::Registered), "Image was not unregistered");
|
||||
|
||||
// Mark render targets as dirty
|
||||
auto& dirty = maxwell3d.dirty.flags;
|
||||
auto& dirty = maxwell3d->dirty.flags;
|
||||
dirty[Dirty::RenderTargets] = true;
|
||||
dirty[Dirty::ZetaBuffer] = true;
|
||||
for (size_t rt = 0; rt < NUM_RT; ++rt) {
|
||||
|
@ -1681,22 +1687,24 @@ void TextureCache<P>::DeleteImage(ImageId image_id, bool immediate_delete) {
|
|||
if (alloc_images.empty()) {
|
||||
image_allocs_table.erase(alloc_it);
|
||||
}
|
||||
if constexpr (ENABLE_VALIDATION) {
|
||||
std::ranges::fill(graphics_image_view_ids, CORRUPT_ID);
|
||||
std::ranges::fill(compute_image_view_ids, CORRUPT_ID);
|
||||
for (auto& this_state : channel_storage) {
|
||||
if constexpr (ENABLE_VALIDATION) {
|
||||
std::ranges::fill(this_state.graphics_image_view_ids, CORRUPT_ID);
|
||||
std::ranges::fill(this_state.compute_image_view_ids, CORRUPT_ID);
|
||||
}
|
||||
this_state.graphics_image_table.Invalidate();
|
||||
this_state.compute_image_table.Invalidate();
|
||||
}
|
||||
graphics_image_table.Invalidate();
|
||||
compute_image_table.Invalidate();
|
||||
has_deleted_images = true;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::RemoveImageViewReferences(std::span<const ImageViewId> removed_views) {
|
||||
auto it = image_views.begin();
|
||||
while (it != image_views.end()) {
|
||||
auto it = state->image_views.begin();
|
||||
while (it != state->image_views.end()) {
|
||||
const auto found = std::ranges::find(removed_views, it->second);
|
||||
if (found != removed_views.end()) {
|
||||
it = image_views.erase(it);
|
||||
it = state->image_views.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
|
@ -1943,7 +1951,7 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
|
|||
const ImageViewBase& image_view = slot_image_views[id];
|
||||
const ImageBase& image = slot_images[image_view.image_id];
|
||||
const Extent3D size = image_view.size;
|
||||
const auto& regs = maxwell3d.regs;
|
||||
const auto& regs = maxwell3d->regs;
|
||||
const auto& scissor = regs.scissor_test[0];
|
||||
if (image.info.resources.levels > 1 || image.info.resources.layers > 1) {
|
||||
// Images with multiple resources can't be cleared in a single call
|
||||
|
@ -1958,4 +1966,61 @@ bool TextureCache<P>::IsFullClear(ImageViewId id) {
|
|||
scissor.max_y >= size.height;
|
||||
}
|
||||
|
||||
template <class P>
|
||||
TextureCache<P>::ChannelInfo::ChannelInfo(Tegra::Control::ChannelState& state) noexcept
|
||||
: maxwell3d{*state.maxwell_3d}, kepler_compute{*state.kepler_compute},
|
||||
gpu_memory{*state.memory_manager}, graphics_image_table{gpu_memory},
|
||||
graphics_sampler_table{gpu_memory}, compute_image_table{gpu_memory}, compute_sampler_table{
|
||||
gpu_memory} {}
|
||||
|
||||
template <class P>
|
||||
void TextureCache<P>::CreateChannel(struct Tegra::Control::ChannelState& channel) {
|
||||
ASSERT(channel_map.find(channel.bind_id) == channel_map.end() && channel.bind_id >= 0);
|
||||
auto new_id = [this, &channel]() {
|
||||
if (!free_channel_ids.empty()) {
|
||||
auto id = free_channel_ids.front();
|
||||
free_channel_ids.pop_front();
|
||||
new (&channel_storage[id]) ChannelInfo(channel);
|
||||
return id;
|
||||
}
|
||||
channel_storage.emplace_back(channel);
|
||||
return channel_storage.size() - 1;
|
||||
}();
|
||||
channel_map.emplace(channel.bind_id, new_id);
|
||||
if (current_channel_id != UNSET_CHANNEL) {
|
||||
state = &channel_storage[current_channel_id];
|
||||
}
|
||||
}
|
||||
|
||||
/// Bind a channel for execution.
|
||||
template <class P>
|
||||
void TextureCache<P>::BindToChannel(s32 id) {
|
||||
auto it = channel_map.find(id);
|
||||
ASSERT(it != channel_map.end() && id >= 0);
|
||||
current_channel_id = it->second;
|
||||
state = &channel_storage[current_channel_id];
|
||||
maxwell3d = &state->maxwell3d;
|
||||
kepler_compute = &state->kepler_compute;
|
||||
gpu_memory = &state->gpu_memory;
|
||||
}
|
||||
|
||||
/// Erase channel's state.
|
||||
template <class P>
|
||||
void TextureCache<P>::EraseChannel(s32 id) {
|
||||
const auto it = channel_map.find(id);
|
||||
ASSERT(it != channel_map.end() && id >= 0);
|
||||
const auto this_id = it->second;
|
||||
free_channel_ids.push_back(this_id);
|
||||
channel_map.erase(it);
|
||||
if (this_id == current_channel_id) {
|
||||
current_channel_id = UNSET_CHANNEL;
|
||||
state = nullptr;
|
||||
maxwell3d = nullptr;
|
||||
kepler_compute = nullptr;
|
||||
gpu_memory = nullptr;
|
||||
} else if (current_channel_id != UNSET_CHANNEL) {
|
||||
state = &channel_storage[current_channel_id];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
||||
|
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <deque>
|
||||
#include <limits>
|
||||
#include <mutex>
|
||||
#include <span>
|
||||
#include <type_traits>
|
||||
|
@ -26,6 +28,10 @@
|
|||
#include "video_core/texture_cache/types.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Tegra::Control {
|
||||
struct ChannelState;
|
||||
}
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
using Tegra::Texture::SwizzleSource;
|
||||
|
@ -58,6 +64,8 @@ class TextureCache {
|
|||
/// True when the API can provide info about the memory of the device.
|
||||
static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO;
|
||||
|
||||
static constexpr size_t UNSET_CHANNEL{std::numeric_limits<size_t>::max()};
|
||||
|
||||
static constexpr s64 TARGET_THRESHOLD = 4_GiB;
|
||||
static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB;
|
||||
static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB;
|
||||
|
@ -85,8 +93,7 @@ class TextureCache {
|
|||
};
|
||||
|
||||
public:
|
||||
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&, Tegra::Engines::Maxwell3D&,
|
||||
Tegra::Engines::KeplerCompute&, Tegra::MemoryManager&);
|
||||
explicit TextureCache(Runtime&, VideoCore::RasterizerInterface&);
|
||||
|
||||
/// Notify the cache that a new frame has been queued
|
||||
void TickFrame();
|
||||
|
@ -171,6 +178,15 @@ public:
|
|||
|
||||
[[nodiscard]] bool IsRescaling(const ImageViewBase& image_view) const noexcept;
|
||||
|
||||
/// Create channel state.
|
||||
void CreateChannel(struct Tegra::Control::ChannelState& channel);
|
||||
|
||||
/// Bind a channel for execution.
|
||||
void BindToChannel(s32 id);
|
||||
|
||||
/// Erase channel's state.
|
||||
void EraseChannel(s32 id);
|
||||
|
||||
std::mutex mutex;
|
||||
|
||||
private:
|
||||
|
@ -338,31 +354,52 @@ private:
|
|||
u64 GetScaledImageSizeBytes(ImageBase& image);
|
||||
|
||||
Runtime& runtime;
|
||||
|
||||
struct ChannelInfo {
|
||||
ChannelInfo() = delete;
|
||||
ChannelInfo(struct Tegra::Control::ChannelState& state) noexcept;
|
||||
ChannelInfo(const ChannelInfo& state) = delete;
|
||||
ChannelInfo& operator=(const ChannelInfo&) = delete;
|
||||
ChannelInfo(ChannelInfo&& other) noexcept = default;
|
||||
ChannelInfo& operator=(ChannelInfo&& other) noexcept = default;
|
||||
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
|
||||
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
|
||||
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
|
||||
std::vector<SamplerId> graphics_sampler_ids;
|
||||
std::vector<ImageViewId> graphics_image_view_ids;
|
||||
|
||||
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
|
||||
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
|
||||
std::vector<SamplerId> compute_sampler_ids;
|
||||
std::vector<ImageViewId> compute_image_view_ids;
|
||||
|
||||
std::unordered_map<TICEntry, ImageViewId> image_views;
|
||||
std::unordered_map<TSCEntry, SamplerId> samplers;
|
||||
|
||||
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
|
||||
};
|
||||
|
||||
std::deque<ChannelInfo> channel_storage;
|
||||
std::deque<size_t> free_channel_ids;
|
||||
std::unordered_map<s32, size_t> channel_map;
|
||||
|
||||
ChannelInfo* state;
|
||||
size_t current_channel_id{UNSET_CHANNEL};
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
|
||||
DescriptorTable<TICEntry> graphics_image_table{gpu_memory};
|
||||
DescriptorTable<TSCEntry> graphics_sampler_table{gpu_memory};
|
||||
std::vector<SamplerId> graphics_sampler_ids;
|
||||
std::vector<ImageViewId> graphics_image_view_ids;
|
||||
|
||||
DescriptorTable<TICEntry> compute_image_table{gpu_memory};
|
||||
DescriptorTable<TSCEntry> compute_sampler_table{gpu_memory};
|
||||
std::vector<SamplerId> compute_sampler_ids;
|
||||
std::vector<ImageViewId> compute_image_view_ids;
|
||||
Tegra::Engines::Maxwell3D* maxwell3d;
|
||||
Tegra::Engines::KeplerCompute* kepler_compute;
|
||||
Tegra::MemoryManager* gpu_memory;
|
||||
|
||||
RenderTargets render_targets;
|
||||
|
||||
std::unordered_map<TICEntry, ImageViewId> image_views;
|
||||
std::unordered_map<TSCEntry, SamplerId> samplers;
|
||||
std::unordered_map<RenderTargets, FramebufferId> framebuffers;
|
||||
|
||||
std::unordered_map<u64, std::vector<ImageMapId>, IdentityHash<u64>> page_table;
|
||||
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> gpu_page_table;
|
||||
std::unordered_map<u64, std::vector<ImageId>, IdentityHash<u64>> sparse_page_table;
|
||||
|
||||
std::unordered_map<ImageId, std::vector<ImageViewId>> sparse_views;
|
||||
|
||||
VAddr virtual_invalid_space{};
|
||||
|
|
Loading…
Reference in a new issue