shader: Move pipeline cache logic to separate files
Move code to separate files to be able to reuse it from OpenGL. This greatly simplifies the pipeline cache logic on Vulkan. Transform feedback state is not yet abstracted and it's still intrusively stored inside vk_pipeline_cache. It will be moved when needed on OpenGL.
This commit is contained in:
parent
ac8835659e
commit
025b20f96a
|
@ -145,7 +145,10 @@ add_library(video_core STATIC
|
|||
renderer_vulkan/vk_texture_cache.h
|
||||
renderer_vulkan/vk_update_descriptor.cpp
|
||||
renderer_vulkan/vk_update_descriptor.h
|
||||
shader_cache.cpp
|
||||
shader_cache.h
|
||||
shader_environment.cpp
|
||||
shader_environment.h
|
||||
shader_notify.cpp
|
||||
shader_notify.h
|
||||
surface.cpp
|
||||
|
|
|
@ -217,7 +217,7 @@ private:
|
|||
TextureCache texture_cache;
|
||||
BufferCacheRuntime buffer_cache_runtime;
|
||||
BufferCache buffer_cache;
|
||||
ShaderCacheOpenGL shader_cache;
|
||||
ShaderCache shader_cache;
|
||||
QueryCache query_cache;
|
||||
AccelerateDMA accelerate_dma;
|
||||
FenceManagerOpenGL fence_manager;
|
||||
|
|
|
@ -29,18 +29,13 @@
|
|||
|
||||
namespace OpenGL {
|
||||
|
||||
Shader::Shader() = default;
|
||||
ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
|
||||
Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_)
|
||||
: VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
|
||||
emu_window{emu_window_}, gpu{gpu_}, device{device_} {}
|
||||
|
||||
Shader::~Shader() = default;
|
||||
|
||||
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
|
||||
Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_)
|
||||
: ShaderCache{rasterizer_}, emu_window{emu_window_}, gpu{gpu_}, gpu_memory{gpu_memory_},
|
||||
maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, device{device_} {}
|
||||
|
||||
ShaderCacheOpenGL::~ShaderCacheOpenGL() = default;
|
||||
ShaderCache::~ShaderCache() = default;
|
||||
|
||||
} // namespace OpenGL
|
||||
|
|
|
@ -36,27 +36,59 @@ class RasterizerOpenGL;
|
|||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
class Shader {
|
||||
struct GraphicsProgramKey {
|
||||
struct TransformFeedbackState {
|
||||
struct Layout {
|
||||
u32 stream;
|
||||
u32 varying_count;
|
||||
u32 stride;
|
||||
};
|
||||
std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts;
|
||||
std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings;
|
||||
};
|
||||
|
||||
std::array<u64, 6> unique_hashes;
|
||||
std::array<u8, Maxwell::NumRenderTargets> color_formats;
|
||||
union {
|
||||
u32 raw;
|
||||
BitField<0, 1, u32> xfb_enabled;
|
||||
BitField<1, 1, u32> early_z;
|
||||
BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology;
|
||||
BitField<6, 2, u32> tessellation_primitive;
|
||||
BitField<8, 2, u32> tessellation_spacing;
|
||||
BitField<10, 1, u32> tessellation_clockwise;
|
||||
};
|
||||
u32 padding;
|
||||
TransformFeedbackState xfb_state;
|
||||
|
||||
[[nodiscard]] size_t Size() const noexcept {
|
||||
if (xfb_enabled != 0) {
|
||||
return sizeof(GraphicsProgramKey);
|
||||
} else {
|
||||
return offsetof(GraphicsProgramKey, padding);
|
||||
}
|
||||
}
|
||||
};
|
||||
static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>);
|
||||
static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>);
|
||||
static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>);
|
||||
|
||||
class GraphicsProgram {
|
||||
public:
|
||||
explicit Shader();
|
||||
~Shader();
|
||||
private:
|
||||
};
|
||||
|
||||
class ShaderCacheOpenGL final : public VideoCommon::ShaderCache<Shader> {
|
||||
class ShaderCache : public VideoCommon::ShaderCache {
|
||||
public:
|
||||
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer_,
|
||||
Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_);
|
||||
~ShaderCacheOpenGL() override;
|
||||
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
|
||||
Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_);
|
||||
~ShaderCache();
|
||||
|
||||
private:
|
||||
Core::Frontend::EmuWindow& emu_window;
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
const Device& device;
|
||||
};
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@
|
|||
namespace Vulkan {
|
||||
|
||||
struct GraphicsPipelineCacheKey {
|
||||
std::array<u128, 6> unique_hashes;
|
||||
std::array<u64, 6> unique_hashes;
|
||||
FixedPipelineState state;
|
||||
|
||||
size_t Hash() const noexcept;
|
||||
|
|
|
@ -11,7 +11,8 @@
|
|||
|
||||
#include "common/bit_cast.h"
|
||||
#include "common/cityhash.h"
|
||||
#include "common/file_util.h"
|
||||
#include "common/fs/fs.h"
|
||||
#include "common/fs/path_util.h"
|
||||
#include "common/microprofile.h"
|
||||
#include "common/thread_worker.h"
|
||||
#include "core/core.h"
|
||||
|
@ -36,6 +37,7 @@
|
|||
#include "video_core/renderer_vulkan/vk_shader_util.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/shader_environment.h"
|
||||
#include "video_core/shader_notify.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
@ -43,449 +45,19 @@
|
|||
namespace Vulkan {
|
||||
MICROPROFILE_DECLARE(Vulkan_PipelineCache);
|
||||
|
||||
namespace {
|
||||
using Shader::Backend::SPIRV::EmitSPIRV;
|
||||
using Shader::Maxwell::TranslateProgram;
|
||||
using VideoCommon::ComputeEnvironment;
|
||||
using VideoCommon::FileEnvironment;
|
||||
using VideoCommon::GenericEnvironment;
|
||||
using VideoCommon::GraphicsEnvironment;
|
||||
|
||||
template <typename Container>
|
||||
auto MakeSpan(Container& container) {
|
||||
return std::span(container.data(), container.size());
|
||||
}
|
||||
|
||||
static u64 MakeCbufKey(u32 index, u32 offset) {
|
||||
return (static_cast<u64>(index) << 32) | offset;
|
||||
}
|
||||
|
||||
class GenericEnvironment : public Shader::Environment {
|
||||
public:
|
||||
explicit GenericEnvironment() = default;
|
||||
explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
|
||||
u32 start_address_)
|
||||
: gpu_memory{&gpu_memory_}, program_base{program_base_} {
|
||||
start_address = start_address_;
|
||||
}
|
||||
|
||||
~GenericEnvironment() override = default;
|
||||
|
||||
u32 TextureBoundBuffer() const final {
|
||||
return texture_bound;
|
||||
}
|
||||
|
||||
u32 LocalMemorySize() const final {
|
||||
return local_memory_size;
|
||||
}
|
||||
|
||||
u32 SharedMemorySize() const final {
|
||||
return shared_memory_size;
|
||||
}
|
||||
|
||||
std::array<u32, 3> WorkgroupSize() const final {
|
||||
return workgroup_size;
|
||||
}
|
||||
|
||||
u64 ReadInstruction(u32 address) final {
|
||||
read_lowest = std::min(read_lowest, address);
|
||||
read_highest = std::max(read_highest, address);
|
||||
|
||||
if (address >= cached_lowest && address < cached_highest) {
|
||||
return code[(address - cached_lowest) / INST_SIZE];
|
||||
}
|
||||
has_unbound_instructions = true;
|
||||
return gpu_memory->Read<u64>(program_base + address);
|
||||
}
|
||||
|
||||
std::optional<u128> Analyze() {
|
||||
const std::optional<u64> size{TryFindSize()};
|
||||
if (!size) {
|
||||
return std::nullopt;
|
||||
}
|
||||
cached_lowest = start_address;
|
||||
cached_highest = start_address + static_cast<u32>(*size);
|
||||
return Common::CityHash128(reinterpret_cast<const char*>(code.data()), *size);
|
||||
}
|
||||
|
||||
void SetCachedSize(size_t size_bytes) {
|
||||
cached_lowest = start_address;
|
||||
cached_highest = start_address + static_cast<u32>(size_bytes);
|
||||
code.resize(CachedSize());
|
||||
gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t CachedSize() const noexcept {
|
||||
return cached_highest - cached_lowest + INST_SIZE;
|
||||
}
|
||||
|
||||
[[nodiscard]] size_t ReadSize() const noexcept {
|
||||
return read_highest - read_lowest + INST_SIZE;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool CanBeSerialized() const noexcept {
|
||||
return !has_unbound_instructions;
|
||||
}
|
||||
|
||||
[[nodiscard]] u128 CalculateHash() const {
|
||||
const size_t size{ReadSize()};
|
||||
const auto data{std::make_unique<char[]>(size)};
|
||||
gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
|
||||
return Common::CityHash128(data.get(), size);
|
||||
}
|
||||
|
||||
void Serialize(std::ofstream& file) const {
|
||||
const u64 code_size{static_cast<u64>(CachedSize())};
|
||||
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
|
||||
const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
|
||||
|
||||
file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
|
||||
.write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
|
||||
.write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
|
||||
.write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
|
||||
.write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
|
||||
.write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
|
||||
.write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
|
||||
.write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
|
||||
.write(reinterpret_cast<const char*>(&stage), sizeof(stage))
|
||||
.write(reinterpret_cast<const char*>(code.data()), code_size);
|
||||
for (const auto [key, type] : texture_types) {
|
||||
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
||||
.write(reinterpret_cast<const char*>(&type), sizeof(type));
|
||||
}
|
||||
for (const auto [key, type] : cbuf_values) {
|
||||
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
||||
.write(reinterpret_cast<const char*>(&type), sizeof(type));
|
||||
}
|
||||
if (stage == Shader::Stage::Compute) {
|
||||
file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
|
||||
.write(reinterpret_cast<const char*>(&shared_memory_size),
|
||||
sizeof(shared_memory_size));
|
||||
} else {
|
||||
file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
|
||||
}
|
||||
}
|
||||
|
||||
protected:
|
||||
static constexpr size_t INST_SIZE = sizeof(u64);
|
||||
|
||||
std::optional<u64> TryFindSize() {
|
||||
constexpr size_t BLOCK_SIZE = 0x1000;
|
||||
constexpr size_t MAXIMUM_SIZE = 0x100000;
|
||||
|
||||
constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
|
||||
constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
|
||||
|
||||
GPUVAddr guest_addr{program_base + start_address};
|
||||
size_t offset{0};
|
||||
size_t size{BLOCK_SIZE};
|
||||
while (size <= MAXIMUM_SIZE) {
|
||||
code.resize(size / INST_SIZE);
|
||||
u64* const data = code.data() + offset / INST_SIZE;
|
||||
gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
|
||||
for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
|
||||
const u64 inst = data[index / INST_SIZE];
|
||||
if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
|
||||
return offset + index;
|
||||
}
|
||||
}
|
||||
guest_addr += BLOCK_SIZE;
|
||||
size += BLOCK_SIZE;
|
||||
offset += BLOCK_SIZE;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
|
||||
u32 raw) {
|
||||
const TextureHandle handle{raw, via_header_index};
|
||||
const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)};
|
||||
Tegra::Texture::TICEntry entry;
|
||||
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
|
||||
|
||||
const Shader::TextureType result{[&] {
|
||||
switch (entry.texture_type) {
|
||||
case Tegra::Texture::TextureType::Texture1D:
|
||||
return Shader::TextureType::Color1D;
|
||||
case Tegra::Texture::TextureType::Texture2D:
|
||||
case Tegra::Texture::TextureType::Texture2DNoMipmap:
|
||||
return Shader::TextureType::Color2D;
|
||||
case Tegra::Texture::TextureType::Texture3D:
|
||||
return Shader::TextureType::Color3D;
|
||||
case Tegra::Texture::TextureType::TextureCubemap:
|
||||
return Shader::TextureType::ColorCube;
|
||||
case Tegra::Texture::TextureType::Texture1DArray:
|
||||
return Shader::TextureType::ColorArray1D;
|
||||
case Tegra::Texture::TextureType::Texture2DArray:
|
||||
return Shader::TextureType::ColorArray2D;
|
||||
case Tegra::Texture::TextureType::Texture1DBuffer:
|
||||
return Shader::TextureType::Buffer;
|
||||
case Tegra::Texture::TextureType::TextureCubeArray:
|
||||
return Shader::TextureType::ColorArrayCube;
|
||||
default:
|
||||
throw Shader::NotImplementedException("Unknown texture type");
|
||||
}
|
||||
}()};
|
||||
texture_types.emplace(raw, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
Tegra::MemoryManager* gpu_memory{};
|
||||
GPUVAddr program_base{};
|
||||
|
||||
std::vector<u64> code;
|
||||
std::unordered_map<u32, Shader::TextureType> texture_types;
|
||||
std::unordered_map<u64, u32> cbuf_values;
|
||||
|
||||
u32 local_memory_size{};
|
||||
u32 texture_bound{};
|
||||
u32 shared_memory_size{};
|
||||
std::array<u32, 3> workgroup_size{};
|
||||
|
||||
u32 read_lowest = std::numeric_limits<u32>::max();
|
||||
u32 read_highest = 0;
|
||||
|
||||
u32 cached_lowest = std::numeric_limits<u32>::max();
|
||||
u32 cached_highest = 0;
|
||||
|
||||
bool has_unbound_instructions = false;
|
||||
};
|
||||
|
||||
namespace {
|
||||
using Shader::Backend::SPIRV::EmitSPIRV;
|
||||
using Shader::Maxwell::TranslateProgram;
|
||||
|
||||
// TODO: Move this to a separate file
|
||||
constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
|
||||
constexpr u32 CACHE_VERSION{2};
|
||||
|
||||
class GraphicsEnvironment final : public GenericEnvironment {
|
||||
public:
|
||||
explicit GraphicsEnvironment() = default;
|
||||
explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_, Maxwell::ShaderProgram program,
|
||||
GPUVAddr program_base_, u32 start_address_)
|
||||
: GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
|
||||
gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
stage = Shader::Stage::VertexA;
|
||||
stage_index = 0;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
stage = Shader::Stage::VertexB;
|
||||
stage_index = 0;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::TesselationControl:
|
||||
stage = Shader::Stage::TessellationControl;
|
||||
stage_index = 1;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::TesselationEval:
|
||||
stage = Shader::Stage::TessellationEval;
|
||||
stage_index = 2;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
stage = Shader::Stage::Geometry;
|
||||
stage_index = 3;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
stage = Shader::Stage::Fragment;
|
||||
stage_index = 4;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid program={}", program);
|
||||
break;
|
||||
}
|
||||
const u64 local_size{sph.LocalMemorySize()};
|
||||
ASSERT(local_size <= std::numeric_limits<u32>::max());
|
||||
local_memory_size = static_cast<u32>(local_size);
|
||||
texture_bound = maxwell3d->regs.tex_cb_index;
|
||||
}
|
||||
|
||||
~GraphicsEnvironment() override = default;
|
||||
|
||||
u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
|
||||
const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
|
||||
ASSERT(cbuf.enabled);
|
||||
u32 value{};
|
||||
if (cbuf_offset < cbuf.size) {
|
||||
value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
|
||||
}
|
||||
cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
|
||||
return value;
|
||||
}
|
||||
|
||||
Shader::TextureType ReadTextureType(u32 handle) override {
|
||||
const auto& regs{maxwell3d->regs};
|
||||
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
|
||||
return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
|
||||
}
|
||||
|
||||
private:
|
||||
Tegra::Engines::Maxwell3D* maxwell3d{};
|
||||
size_t stage_index{};
|
||||
};
|
||||
|
||||
class ComputeEnvironment final : public GenericEnvironment {
|
||||
public:
|
||||
explicit ComputeEnvironment() = default;
|
||||
explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
|
||||
u32 start_address_)
|
||||
: GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
|
||||
&kepler_compute_} {
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
stage = Shader::Stage::Compute;
|
||||
local_memory_size = qmd.local_pos_alloc;
|
||||
texture_bound = kepler_compute->regs.tex_cb_index;
|
||||
shared_memory_size = qmd.shared_alloc;
|
||||
workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
|
||||
}
|
||||
|
||||
~ComputeEnvironment() override = default;
|
||||
|
||||
u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
|
||||
const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
|
||||
u32 value{};
|
||||
if (cbuf_offset < cbuf.size) {
|
||||
value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
|
||||
}
|
||||
cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
|
||||
return value;
|
||||
}
|
||||
|
||||
Shader::TextureType ReadTextureType(u32 handle) override {
|
||||
const auto& regs{kepler_compute->regs};
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
|
||||
}
|
||||
|
||||
private:
|
||||
Tegra::Engines::KeplerCompute* kepler_compute{};
|
||||
};
|
||||
|
||||
void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
|
||||
std::ofstream& file) {
|
||||
if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
|
||||
return;
|
||||
}
|
||||
const u32 num_envs{static_cast<u32>(envs.size())};
|
||||
file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
|
||||
for (const GenericEnvironment* const env : envs) {
|
||||
env->Serialize(file);
|
||||
}
|
||||
file.write(key.data(), key.size_bytes());
|
||||
}
|
||||
|
||||
template <typename Key, typename Envs>
|
||||
void SerializePipeline(const Key& key, const Envs& envs, const std::string& filename) {
|
||||
try {
|
||||
std::ofstream file;
|
||||
file.exceptions(std::ifstream::failbit);
|
||||
Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app);
|
||||
if (!file.is_open()) {
|
||||
LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename);
|
||||
return;
|
||||
}
|
||||
if (file.tellp() == 0) {
|
||||
file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
|
||||
.write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION));
|
||||
}
|
||||
const std::span key_span(reinterpret_cast<const char*>(&key), sizeof(key));
|
||||
SerializePipeline(key_span, MakeSpan(envs), file);
|
||||
|
||||
} catch (const std::ios_base::failure& e) {
|
||||
LOG_ERROR(Common_Filesystem, "{}", e.what());
|
||||
if (!Common::FS::Delete(filename)) {
|
||||
LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}", filename);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class FileEnvironment final : public Shader::Environment {
|
||||
public:
|
||||
void Deserialize(std::ifstream& file) {
|
||||
u64 code_size{};
|
||||
u64 num_texture_types{};
|
||||
u64 num_cbuf_values{};
|
||||
file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
|
||||
.read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
|
||||
.read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
|
||||
.read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
|
||||
.read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
|
||||
.read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
|
||||
.read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
|
||||
.read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
|
||||
.read(reinterpret_cast<char*>(&stage), sizeof(stage));
|
||||
code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
|
||||
file.read(reinterpret_cast<char*>(code.get()), code_size);
|
||||
for (size_t i = 0; i < num_texture_types; ++i) {
|
||||
u32 key;
|
||||
Shader::TextureType type;
|
||||
file.read(reinterpret_cast<char*>(&key), sizeof(key))
|
||||
.read(reinterpret_cast<char*>(&type), sizeof(type));
|
||||
texture_types.emplace(key, type);
|
||||
}
|
||||
for (size_t i = 0; i < num_cbuf_values; ++i) {
|
||||
u64 key;
|
||||
u32 value;
|
||||
file.read(reinterpret_cast<char*>(&key), sizeof(key))
|
||||
.read(reinterpret_cast<char*>(&value), sizeof(value));
|
||||
cbuf_values.emplace(key, value);
|
||||
}
|
||||
if (stage == Shader::Stage::Compute) {
|
||||
file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
|
||||
.read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
|
||||
} else {
|
||||
file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
|
||||
}
|
||||
}
|
||||
|
||||
u64 ReadInstruction(u32 address) override {
|
||||
if (address < read_lowest || address > read_highest) {
|
||||
throw Shader::LogicError("Out of bounds address {}", address);
|
||||
}
|
||||
return code[(address - read_lowest) / sizeof(u64)];
|
||||
}
|
||||
|
||||
u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override {
|
||||
const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
|
||||
if (it == cbuf_values.end()) {
|
||||
throw Shader::LogicError("Uncached read texture type");
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
Shader::TextureType ReadTextureType(u32 handle) override {
|
||||
const auto it{texture_types.find(handle)};
|
||||
if (it == texture_types.end()) {
|
||||
throw Shader::LogicError("Uncached read texture type");
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
u32 LocalMemorySize() const override {
|
||||
return local_memory_size;
|
||||
}
|
||||
|
||||
u32 SharedMemorySize() const override {
|
||||
return shared_memory_size;
|
||||
}
|
||||
|
||||
u32 TextureBoundBuffer() const override {
|
||||
return texture_bound;
|
||||
}
|
||||
|
||||
std::array<u32, 3> WorkgroupSize() const override {
|
||||
return workgroup_size;
|
||||
}
|
||||
|
||||
private:
|
||||
std::unique_ptr<u64[]> code;
|
||||
std::unordered_map<u32, Shader::TextureType> texture_types;
|
||||
std::unordered_map<u64, u32> cbuf_values;
|
||||
std::array<u32, 3> workgroup_size{};
|
||||
u32 local_memory_size{};
|
||||
u32 shared_memory_size{};
|
||||
u32 texture_bound{};
|
||||
u32 read_lowest{};
|
||||
u32 read_highest{};
|
||||
};
|
||||
|
||||
Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp comparison) {
|
||||
switch (comparison) {
|
||||
case Maxwell::ComparisonOp::Never:
|
||||
|
@ -518,113 +90,6 @@ Shader::CompareFunction MaxwellToCompareFunction(Maxwell::ComparisonOp compariso
|
|||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) {
|
||||
if (title_id == 0) {
|
||||
return;
|
||||
}
|
||||
std::string shader_dir{Common::FS::GetUserPath(Common::FS::UserPath::ShaderDir)};
|
||||
std::string base_dir{shader_dir + "/vulkan"};
|
||||
std::string transferable_dir{base_dir + "/transferable"};
|
||||
std::string precompiled_dir{base_dir + "/precompiled"};
|
||||
if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) ||
|
||||
!Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) {
|
||||
LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
|
||||
return;
|
||||
}
|
||||
pipeline_cache_filename = fmt::format("{}/{:016x}.bin", transferable_dir, title_id);
|
||||
|
||||
struct {
|
||||
std::mutex mutex;
|
||||
size_t total{0};
|
||||
size_t built{0};
|
||||
bool has_loaded{false};
|
||||
} state;
|
||||
|
||||
std::ifstream file;
|
||||
Common::FS::OpenFStream(file, pipeline_cache_filename, std::ios::binary | std::ios::ate);
|
||||
if (!file.is_open()) {
|
||||
return;
|
||||
}
|
||||
file.exceptions(std::ifstream::failbit);
|
||||
const auto end{file.tellg()};
|
||||
file.seekg(0, std::ios::beg);
|
||||
|
||||
std::array<char, 8> magic_number;
|
||||
u32 cache_version;
|
||||
file.read(magic_number.data(), magic_number.size())
|
||||
.read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
|
||||
if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) {
|
||||
file.close();
|
||||
if (Common::FS::Delete(pipeline_cache_filename)) {
|
||||
if (magic_number != MAGIC_NUMBER) {
|
||||
LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file");
|
||||
}
|
||||
if (cache_version != CACHE_VERSION) {
|
||||
LOG_INFO(Render_Vulkan, "Deleting old pipeline cache");
|
||||
}
|
||||
} else {
|
||||
LOG_ERROR(Render_Vulkan,
|
||||
"Invalid pipeline cache file and failed to delete it in \"{}\"",
|
||||
pipeline_cache_filename);
|
||||
}
|
||||
return;
|
||||
}
|
||||
while (file.tellg() != end) {
|
||||
if (stop_loading) {
|
||||
return;
|
||||
}
|
||||
u32 num_envs{};
|
||||
file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
|
||||
std::vector<FileEnvironment> envs(num_envs);
|
||||
for (FileEnvironment& env : envs) {
|
||||
env.Deserialize(file);
|
||||
}
|
||||
if (envs.front().ShaderStage() == Shader::Stage::Compute) {
|
||||
ComputePipelineCacheKey key;
|
||||
file.read(reinterpret_cast<char*>(&key), sizeof(key));
|
||||
|
||||
workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
|
||||
ShaderPools pools;
|
||||
auto pipeline{CreateComputePipeline(pools, key, envs.front(), false)};
|
||||
|
||||
std::lock_guard lock{state.mutex};
|
||||
compute_cache.emplace(key, std::move(pipeline));
|
||||
++state.built;
|
||||
if (state.has_loaded) {
|
||||
callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
|
||||
}
|
||||
});
|
||||
} else {
|
||||
GraphicsPipelineCacheKey key;
|
||||
file.read(reinterpret_cast<char*>(&key), sizeof(key));
|
||||
|
||||
workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
|
||||
ShaderPools pools;
|
||||
boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
|
||||
for (auto& env : envs) {
|
||||
env_ptrs.push_back(&env);
|
||||
}
|
||||
auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
|
||||
|
||||
std::lock_guard lock{state.mutex};
|
||||
graphics_cache.emplace(key, std::move(pipeline));
|
||||
++state.built;
|
||||
if (state.has_loaded) {
|
||||
callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
|
||||
}
|
||||
});
|
||||
}
|
||||
++state.total;
|
||||
}
|
||||
{
|
||||
std::lock_guard lock{state.mutex};
|
||||
callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
|
||||
state.has_loaded = true;
|
||||
}
|
||||
workers.WaitForRequests();
|
||||
}
|
||||
|
||||
size_t ComputePipelineCacheKey::Hash() const noexcept {
|
||||
const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
|
||||
return static_cast<size_t>(hash);
|
||||
|
@ -643,17 +108,15 @@ bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) c
|
|||
return std::memcmp(&rhs, this, Size()) == 0;
|
||||
}
|
||||
|
||||
PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, const Device& device_,
|
||||
VKScheduler& scheduler_, DescriptorPool& descriptor_pool_,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue_,
|
||||
RenderPassCache& render_pass_cache_, BufferCache& buffer_cache_,
|
||||
TextureCache& texture_cache_)
|
||||
: VideoCommon::ShaderCache<ShaderInfo>{rasterizer_}, gpu{gpu_}, maxwell3d{maxwell3d_},
|
||||
kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, device{device_},
|
||||
scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
|
||||
: VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
|
||||
device{device_}, scheduler{scheduler_}, descriptor_pool{descriptor_pool_},
|
||||
update_descriptor_queue{update_descriptor_queue_}, render_pass_cache{render_pass_cache_},
|
||||
buffer_cache{buffer_cache_}, texture_cache{texture_cache_},
|
||||
workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:PipelineBuilder"),
|
||||
|
@ -700,7 +163,7 @@ PipelineCache::~PipelineCache() = default;
|
|||
GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
|
||||
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
|
||||
|
||||
if (!RefreshStages()) {
|
||||
if (!RefreshStages(graphics_key.unique_hashes)) {
|
||||
current_pipeline = nullptr;
|
||||
return nullptr;
|
||||
}
|
||||
|
@ -728,21 +191,14 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
|
|||
ComputePipeline* PipelineCache::CurrentComputePipeline() {
|
||||
MICROPROFILE_SCOPE(Vulkan_PipelineCache);
|
||||
|
||||
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const GPUVAddr shader_addr{program_base + qmd.program_start};
|
||||
const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
|
||||
if (!cpu_shader_addr) {
|
||||
const ShaderInfo* const shader{ComputeShader()};
|
||||
if (!shader) {
|
||||
return nullptr;
|
||||
}
|
||||
const ShaderInfo* shader{TryGet(*cpu_shader_addr)};
|
||||
if (!shader) {
|
||||
ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
|
||||
shader = MakeShaderInfo(env, *cpu_shader_addr);
|
||||
}
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const ComputePipelineCacheKey key{
|
||||
.unique_hash{shader->unique_hash},
|
||||
.shared_memory_size{qmd.shared_alloc},
|
||||
.unique_hash = shader->unique_hash,
|
||||
.shared_memory_size = qmd.shared_alloc,
|
||||
.workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z},
|
||||
};
|
||||
const auto [pair, is_new]{compute_cache.try_emplace(key)};
|
||||
|
@ -754,58 +210,75 @@ ComputePipeline* PipelineCache::CurrentComputePipeline() {
|
|||
return pipeline.get();
|
||||
}
|
||||
|
||||
bool PipelineCache::RefreshStages() {
|
||||
auto& dirty{maxwell3d.dirty.flags};
|
||||
if (!dirty[VideoCommon::Dirty::Shaders]) {
|
||||
return last_valid_shaders;
|
||||
void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
|
||||
const VideoCore::DiskResourceLoadCallback& callback) {
|
||||
if (title_id == 0) {
|
||||
return;
|
||||
}
|
||||
dirty[VideoCommon::Dirty::Shaders] = false;
|
||||
auto shader_dir{Common::FS::GetYuzuPath(Common::FS::YuzuPath::ShaderDir)};
|
||||
auto base_dir{shader_dir / "vulkan"};
|
||||
auto transferable_dir{base_dir / "transferable"};
|
||||
auto precompiled_dir{base_dir / "precompiled"};
|
||||
if (!Common::FS::CreateDir(shader_dir) || !Common::FS::CreateDir(base_dir) ||
|
||||
!Common::FS::CreateDir(transferable_dir) || !Common::FS::CreateDir(precompiled_dir)) {
|
||||
LOG_ERROR(Common_Filesystem, "Failed to create pipeline cache directories");
|
||||
return;
|
||||
}
|
||||
pipeline_cache_filename = transferable_dir / fmt::format("{:016x}.bin", title_id);
|
||||
|
||||
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
|
||||
graphics_key.unique_hashes[index] = u128{};
|
||||
continue;
|
||||
}
|
||||
const auto& shader_config{maxwell3d.regs.shader_config[index]};
|
||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
const GPUVAddr shader_addr{base_addr + shader_config.offset};
|
||||
const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
|
||||
if (!cpu_shader_addr) {
|
||||
LOG_ERROR(Render_Vulkan, "Invalid GPU address for shader 0x{:016x}", shader_addr);
|
||||
last_valid_shaders = false;
|
||||
return false;
|
||||
}
|
||||
const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
|
||||
if (!shader_info) {
|
||||
const u32 start_address{shader_config.offset};
|
||||
GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
|
||||
shader_info = MakeShaderInfo(env, *cpu_shader_addr);
|
||||
}
|
||||
shader_infos[index] = shader_info;
|
||||
graphics_key.unique_hashes[index] = shader_info->unique_hash;
|
||||
}
|
||||
last_valid_shaders = true;
|
||||
return true;
|
||||
}
|
||||
struct {
|
||||
std::mutex mutex;
|
||||
size_t total{0};
|
||||
size_t built{0};
|
||||
bool has_loaded{false};
|
||||
} state;
|
||||
|
||||
const ShaderInfo* PipelineCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) {
|
||||
auto info = std::make_unique<ShaderInfo>();
|
||||
if (const std::optional<u128> cached_hash{env.Analyze()}) {
|
||||
info->unique_hash = *cached_hash;
|
||||
info->size_bytes = env.CachedSize();
|
||||
} else {
|
||||
// Slow path, not really hit on commercial games
|
||||
// Build a control flow graph to get the real shader size
|
||||
main_pools.flow_block.ReleaseContents();
|
||||
Shader::Maxwell::Flow::CFG cfg{env, main_pools.flow_block, env.StartAddress()};
|
||||
info->unique_hash = env.CalculateHash();
|
||||
info->size_bytes = env.ReadSize();
|
||||
}
|
||||
const size_t size_bytes{info->size_bytes};
|
||||
const ShaderInfo* const result{info.get()};
|
||||
Register(std::move(info), cpu_addr, size_bytes);
|
||||
return result;
|
||||
const auto load_compute{[&](std::ifstream& file, FileEnvironment env) {
|
||||
ComputePipelineCacheKey key;
|
||||
file.read(reinterpret_cast<char*>(&key), sizeof(key));
|
||||
|
||||
workers.QueueWork([this, key, env = std::move(env), &state, &callback]() mutable {
|
||||
ShaderPools pools;
|
||||
auto pipeline{CreateComputePipeline(pools, key, env, false)};
|
||||
|
||||
std::lock_guard lock{state.mutex};
|
||||
compute_cache.emplace(key, std::move(pipeline));
|
||||
++state.built;
|
||||
if (state.has_loaded) {
|
||||
callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
|
||||
}
|
||||
});
|
||||
++state.total;
|
||||
}};
|
||||
const auto load_graphics{[&](std::ifstream& file, std::vector<FileEnvironment> envs) {
|
||||
GraphicsPipelineCacheKey key;
|
||||
file.read(reinterpret_cast<char*>(&key), sizeof(key));
|
||||
|
||||
workers.QueueWork([this, key, envs = std::move(envs), &state, &callback]() mutable {
|
||||
ShaderPools pools;
|
||||
boost::container::static_vector<Shader::Environment*, 5> env_ptrs;
|
||||
for (auto& env : envs) {
|
||||
env_ptrs.push_back(&env);
|
||||
}
|
||||
auto pipeline{CreateGraphicsPipeline(pools, key, MakeSpan(env_ptrs), false)};
|
||||
|
||||
std::lock_guard lock{state.mutex};
|
||||
graphics_cache.emplace(key, std::move(pipeline));
|
||||
++state.built;
|
||||
if (state.has_loaded) {
|
||||
callback(VideoCore::LoadCallbackStage::Build, state.built, state.total);
|
||||
}
|
||||
});
|
||||
++state.total;
|
||||
}};
|
||||
VideoCommon::LoadPipelines(stop_loading, pipeline_cache_filename, load_compute, load_graphics);
|
||||
|
||||
std::unique_lock lock{state.mutex};
|
||||
callback(VideoCore::LoadCallbackStage::Build, 0, state.total);
|
||||
state.has_loaded = true;
|
||||
lock.unlock();
|
||||
|
||||
workers.WaitForRequests();
|
||||
}
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
||||
|
@ -815,7 +288,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
|||
size_t env_index{0};
|
||||
std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs;
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
if (key.unique_hashes[index] == u128{}) {
|
||||
if (key.unique_hashes[index] == 0) {
|
||||
continue;
|
||||
}
|
||||
Shader::Environment& env{*envs[env_index]};
|
||||
|
@ -830,7 +303,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
|||
|
||||
u32 binding{0};
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
if (key.unique_hashes[index] == u128{}) {
|
||||
if (key.unique_hashes[index] == 0) {
|
||||
continue;
|
||||
}
|
||||
UNIMPLEMENTED_IF(index == 0);
|
||||
|
@ -844,8 +317,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline(
|
|||
device.SaveShader(code);
|
||||
modules[stage_index] = BuildShader(device, code);
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
const std::string name{fmt::format("{:016x}{:016x}", key.unique_hashes[index][0],
|
||||
key.unique_hashes[index][1])};
|
||||
const std::string name{fmt::format("{:016x}", key.unique_hashes[index])};
|
||||
modules[stage_index].SetObjectNameEXT(name.c_str());
|
||||
}
|
||||
}
|
||||
|
@ -863,7 +335,7 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
|
||||
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
if (graphics_key.unique_hashes[index] == u128{}) {
|
||||
if (graphics_key.unique_hashes[index] == 0) {
|
||||
continue;
|
||||
}
|
||||
const auto program{static_cast<Maxwell::ShaderProgram>(index)};
|
||||
|
@ -871,7 +343,6 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
const u32 start_address{maxwell3d.regs.shader_config[index].offset};
|
||||
env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address};
|
||||
env.SetCachedSize(shader_infos[index]->size_bytes);
|
||||
|
||||
envs.push_back(&env);
|
||||
}
|
||||
auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)};
|
||||
|
@ -882,11 +353,11 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() {
|
|||
boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram>
|
||||
env_ptrs;
|
||||
for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||
if (key.unique_hashes[index] != u128{}) {
|
||||
if (key.unique_hashes[index] != 0) {
|
||||
env_ptrs.push_back(&envs[index]);
|
||||
}
|
||||
}
|
||||
SerializePipeline(key, env_ptrs, pipeline_cache_filename);
|
||||
VideoCommon::SerializePipeline(key, env_ptrs, pipeline_cache_filename);
|
||||
});
|
||||
return pipeline;
|
||||
}
|
||||
|
@ -902,8 +373,8 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
|||
auto pipeline{CreateComputePipeline(main_pools, key, env, true)};
|
||||
if (!pipeline_cache_filename.empty()) {
|
||||
serialization_thread.QueueWork([this, key, env = std::move(env)] {
|
||||
SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
|
||||
pipeline_cache_filename);
|
||||
VideoCommon::SerializePipeline(key, std::array<const GenericEnvironment*, 1>{&env},
|
||||
pipeline_cache_filename);
|
||||
});
|
||||
}
|
||||
return pipeline;
|
||||
|
@ -921,7 +392,7 @@ std::unique_ptr<ComputePipeline> PipelineCache::CreateComputePipeline(
|
|||
device.SaveShader(code);
|
||||
vk::ShaderModule spv_module{BuildShader(device, code)};
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
const auto name{fmt::format("{:016x}{:016x}", key.unique_hash[0], key.unique_hash[1])};
|
||||
const auto name{fmt::format("{:016x}", key.unique_hash)};
|
||||
spv_module.SetObjectNameEXT(name.c_str());
|
||||
}
|
||||
Common::ThreadWorker* const thread_worker{build_in_parallel ? &workers : nullptr};
|
||||
|
@ -1035,7 +506,7 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key,
|
|||
Shader::Profile profile{base_profile};
|
||||
|
||||
const Shader::Stage stage{program.stage};
|
||||
const bool has_geometry{key.unique_hashes[4] != u128{}};
|
||||
const bool has_geometry{key.unique_hashes[4] != 0};
|
||||
const bool gl_ndc{key.state.ndc_minus_one_to_one != 0};
|
||||
const float point_size{Common::BitCast<float>(key.state.point_size)};
|
||||
switch (stage) {
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <filesystem>
|
||||
#include <iosfwd>
|
||||
#include <memory>
|
||||
#include <type_traits>
|
||||
|
@ -42,7 +43,7 @@ namespace Vulkan {
|
|||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
struct ComputePipelineCacheKey {
|
||||
u128 unique_hash;
|
||||
u64 unique_hash;
|
||||
u32 shared_memory_size;
|
||||
std::array<u32, 3> workgroup_size;
|
||||
|
||||
|
@ -76,16 +77,12 @@ namespace Vulkan {
|
|||
class ComputePipeline;
|
||||
class Device;
|
||||
class DescriptorPool;
|
||||
class GenericEnvironment;
|
||||
class RasterizerVulkan;
|
||||
class RenderPassCache;
|
||||
class VKScheduler;
|
||||
class VKUpdateDescriptorQueue;
|
||||
|
||||
struct ShaderInfo {
|
||||
u128 unique_hash{};
|
||||
size_t size_bytes{};
|
||||
};
|
||||
using VideoCommon::ShaderInfo;
|
||||
|
||||
struct ShaderPools {
|
||||
void ReleaseContents() {
|
||||
|
@ -99,17 +96,16 @@ struct ShaderPools {
|
|||
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
|
||||
};
|
||||
|
||||
class PipelineCache final : public VideoCommon::ShaderCache<ShaderInfo> {
|
||||
class PipelineCache : public VideoCommon::ShaderCache {
|
||||
public:
|
||||
explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::GPU& gpu,
|
||||
Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
explicit PipelineCache(RasterizerVulkan& rasterizer, Tegra::Engines::Maxwell3D& maxwell3d,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute,
|
||||
Tegra::MemoryManager& gpu_memory, const Device& device,
|
||||
VKScheduler& scheduler, DescriptorPool& descriptor_pool,
|
||||
VKUpdateDescriptorQueue& update_descriptor_queue,
|
||||
RenderPassCache& render_pass_cache, BufferCache& buffer_cache,
|
||||
TextureCache& texture_cache);
|
||||
~PipelineCache() override;
|
||||
~PipelineCache();
|
||||
|
||||
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
|
||||
|
||||
|
@ -119,10 +115,6 @@ public:
|
|||
const VideoCore::DiskResourceLoadCallback& callback);
|
||||
|
||||
private:
|
||||
bool RefreshStages();
|
||||
|
||||
const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
|
||||
|
||||
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
|
||||
|
@ -140,11 +132,6 @@ private:
|
|||
Shader::Profile MakeProfile(const GraphicsPipelineCacheKey& key,
|
||||
const Shader::IR::Program& program);
|
||||
|
||||
Tegra::GPU& gpu;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
|
||||
const Device& device;
|
||||
VKScheduler& scheduler;
|
||||
DescriptorPool& descriptor_pool;
|
||||
|
@ -156,16 +143,13 @@ private:
|
|||
GraphicsPipelineCacheKey graphics_key{};
|
||||
GraphicsPipeline* current_pipeline{};
|
||||
|
||||
std::array<const ShaderInfo*, 6> shader_infos{};
|
||||
bool last_valid_shaders{};
|
||||
|
||||
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
|
||||
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
|
||||
|
||||
ShaderPools main_pools;
|
||||
|
||||
Shader::Profile base_profile;
|
||||
std::string pipeline_cache_filename;
|
||||
std::filesystem::path pipeline_cache_filename;
|
||||
|
||||
Common::ThreadWorker workers;
|
||||
Common::ThreadWorker serialization_thread;
|
||||
|
|
|
@ -149,7 +149,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
|
|||
buffer_cache_runtime(device, memory_allocator, scheduler, staging_pool,
|
||||
update_descriptor_queue, descriptor_pool),
|
||||
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
|
||||
pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
|
||||
pipeline_cache(*this, maxwell3d, kepler_compute, gpu_memory, device, scheduler,
|
||||
descriptor_pool, update_descriptor_queue, render_pass_cache, buffer_cache,
|
||||
texture_cache),
|
||||
query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{ buffer_cache },
|
||||
|
|
233
src/video_core/shader_cache.cpp
Normal file
233
src/video_core/shader_cache.cpp
Normal file
|
@ -0,0 +1,233 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "shader_recompiler/frontend/maxwell/control_flow.h"
|
||||
#include "shader_recompiler/object_pool.h"
|
||||
#include "video_core/dirty_flags.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/shader_cache.h"
|
||||
#include "video_core/shader_environment.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
void ShaderCache::InvalidateRegion(VAddr addr, size_t size) {
|
||||
std::scoped_lock lock{invalidation_mutex};
|
||||
InvalidatePagesInRegion(addr, size);
|
||||
RemovePendingShaders();
|
||||
}
|
||||
|
||||
void ShaderCache::OnCPUWrite(VAddr addr, size_t size) {
|
||||
std::lock_guard lock{invalidation_mutex};
|
||||
InvalidatePagesInRegion(addr, size);
|
||||
}
|
||||
|
||||
void ShaderCache::SyncGuestHost() {
|
||||
std::scoped_lock lock{invalidation_mutex};
|
||||
RemovePendingShaders();
|
||||
}
|
||||
|
||||
ShaderCache::ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_)
|
||||
: gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_},
|
||||
rasterizer{rasterizer_} {}
|
||||
|
||||
bool ShaderCache::RefreshStages(std::array<u64, 6>& unique_hashes) {
|
||||
auto& dirty{maxwell3d.dirty.flags};
|
||||
if (!dirty[VideoCommon::Dirty::Shaders]) {
|
||||
return last_shaders_valid;
|
||||
}
|
||||
dirty[VideoCommon::Dirty::Shaders] = false;
|
||||
|
||||
const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()};
|
||||
for (size_t index = 0; index < Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram; ++index) {
|
||||
if (!maxwell3d.regs.IsShaderConfigEnabled(index)) {
|
||||
unique_hashes[index] = 0;
|
||||
continue;
|
||||
}
|
||||
const auto& shader_config{maxwell3d.regs.shader_config[index]};
|
||||
const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)};
|
||||
const GPUVAddr shader_addr{base_addr + shader_config.offset};
|
||||
const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
|
||||
if (!cpu_shader_addr) {
|
||||
LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
|
||||
last_shaders_valid = false;
|
||||
return false;
|
||||
}
|
||||
const ShaderInfo* shader_info{TryGet(*cpu_shader_addr)};
|
||||
if (!shader_info) {
|
||||
const u32 start_address{shader_config.offset};
|
||||
GraphicsEnvironment env{maxwell3d, gpu_memory, program, base_addr, start_address};
|
||||
shader_info = MakeShaderInfo(env, *cpu_shader_addr);
|
||||
}
|
||||
shader_infos[index] = shader_info;
|
||||
unique_hashes[index] = shader_info->unique_hash;
|
||||
}
|
||||
last_shaders_valid = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
const ShaderInfo* ShaderCache::ComputeShader() {
|
||||
const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()};
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
const GPUVAddr shader_addr{program_base + qmd.program_start};
|
||||
const std::optional<VAddr> cpu_shader_addr{gpu_memory.GpuToCpuAddress(shader_addr)};
|
||||
if (!cpu_shader_addr) {
|
||||
LOG_ERROR(HW_GPU, "Invalid GPU address for shader 0x{:016x}", shader_addr);
|
||||
return nullptr;
|
||||
}
|
||||
if (const ShaderInfo* const shader = TryGet(*cpu_shader_addr)) {
|
||||
return shader;
|
||||
}
|
||||
ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start};
|
||||
return MakeShaderInfo(env, *cpu_shader_addr);
|
||||
}
|
||||
|
||||
ShaderInfo* ShaderCache::TryGet(VAddr addr) const {
|
||||
std::scoped_lock lock{lookup_mutex};
|
||||
|
||||
const auto it = lookup_cache.find(addr);
|
||||
if (it == lookup_cache.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return it->second->data;
|
||||
}
|
||||
|
||||
void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size) {
|
||||
std::scoped_lock lock{invalidation_mutex, lookup_mutex};
|
||||
|
||||
const VAddr addr_end = addr + size;
|
||||
Entry* const entry = NewEntry(addr, addr_end, data.get());
|
||||
|
||||
const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
|
||||
for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
|
||||
invalidation_cache[page].push_back(entry);
|
||||
}
|
||||
|
||||
storage.push_back(std::move(data));
|
||||
|
||||
rasterizer.UpdatePagesCachedCount(addr, size, 1);
|
||||
}
|
||||
|
||||
void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
|
||||
const VAddr addr_end = addr + size;
|
||||
const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
|
||||
for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
|
||||
auto it = invalidation_cache.find(page);
|
||||
if (it == invalidation_cache.end()) {
|
||||
continue;
|
||||
}
|
||||
InvalidatePageEntries(it->second, addr, addr_end);
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderCache::RemovePendingShaders() {
|
||||
if (marked_for_removal.empty()) {
|
||||
return;
|
||||
}
|
||||
// Remove duplicates
|
||||
std::ranges::sort(marked_for_removal);
|
||||
marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
|
||||
marked_for_removal.end());
|
||||
|
||||
std::vector<ShaderInfo*> removed_shaders;
|
||||
removed_shaders.reserve(marked_for_removal.size());
|
||||
|
||||
std::scoped_lock lock{lookup_mutex};
|
||||
|
||||
for (Entry* const entry : marked_for_removal) {
|
||||
removed_shaders.push_back(entry->data);
|
||||
|
||||
const auto it = lookup_cache.find(entry->addr_start);
|
||||
ASSERT(it != lookup_cache.end());
|
||||
lookup_cache.erase(it);
|
||||
}
|
||||
marked_for_removal.clear();
|
||||
|
||||
if (!removed_shaders.empty()) {
|
||||
RemoveShadersFromStorage(std::move(removed_shaders));
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderCache::InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
|
||||
size_t index = 0;
|
||||
while (index < entries.size()) {
|
||||
Entry* const entry = entries[index];
|
||||
if (!entry->Overlaps(addr, addr_end)) {
|
||||
++index;
|
||||
continue;
|
||||
}
|
||||
|
||||
UnmarkMemory(entry);
|
||||
RemoveEntryFromInvalidationCache(entry);
|
||||
marked_for_removal.push_back(entry);
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderCache::RemoveEntryFromInvalidationCache(const Entry* entry) {
|
||||
const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
|
||||
for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
|
||||
const auto entries_it = invalidation_cache.find(page);
|
||||
ASSERT(entries_it != invalidation_cache.end());
|
||||
std::vector<Entry*>& entries = entries_it->second;
|
||||
|
||||
const auto entry_it = std::ranges::find(entries, entry);
|
||||
ASSERT(entry_it != entries.end());
|
||||
entries.erase(entry_it);
|
||||
}
|
||||
}
|
||||
|
||||
void ShaderCache::UnmarkMemory(Entry* entry) {
|
||||
if (!entry->is_memory_marked) {
|
||||
return;
|
||||
}
|
||||
entry->is_memory_marked = false;
|
||||
|
||||
const VAddr addr = entry->addr_start;
|
||||
const size_t size = entry->addr_end - addr;
|
||||
rasterizer.UpdatePagesCachedCount(addr, size, -1);
|
||||
}
|
||||
|
||||
void ShaderCache::RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders) {
|
||||
// Remove them from the cache
|
||||
std::erase_if(storage, [&removed_shaders](const std::unique_ptr<ShaderInfo>& shader) {
|
||||
return std::ranges::find(removed_shaders, shader.get()) != removed_shaders.end();
|
||||
});
|
||||
}
|
||||
|
||||
ShaderCache::Entry* ShaderCache::NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data) {
|
||||
auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
|
||||
Entry* const entry_pointer = entry.get();
|
||||
|
||||
lookup_cache.emplace(addr, std::move(entry));
|
||||
return entry_pointer;
|
||||
}
|
||||
|
||||
const ShaderInfo* ShaderCache::MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr) {
|
||||
auto info = std::make_unique<ShaderInfo>();
|
||||
if (const std::optional<u64> cached_hash{env.Analyze()}) {
|
||||
info->unique_hash = *cached_hash;
|
||||
info->size_bytes = env.CachedSize();
|
||||
} else {
|
||||
// Slow path, not really hit on commercial games
|
||||
// Build a control flow graph to get the real shader size
|
||||
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
|
||||
Shader::Maxwell::Flow::CFG cfg{env, flow_block, env.StartAddress()};
|
||||
info->unique_hash = env.CalculateHash();
|
||||
info->size_bytes = env.ReadSize();
|
||||
}
|
||||
const size_t size_bytes{info->size_bytes};
|
||||
const ShaderInfo* const result{info.get()};
|
||||
Register(std::move(info), cpu_addr, size_bytes);
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
|
@ -4,20 +4,28 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <mutex>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
|
||||
namespace Tegra {
|
||||
class MemoryManager;
|
||||
}
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
template <class T>
|
||||
class GenericEnvironment;
|
||||
|
||||
struct ShaderInfo {
|
||||
u64 unique_hash{};
|
||||
size_t size_bytes{};
|
||||
};
|
||||
|
||||
class ShaderCache {
|
||||
static constexpr u64 PAGE_BITS = 14;
|
||||
static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS;
|
||||
|
@ -25,206 +33,100 @@ class ShaderCache {
|
|||
struct Entry {
|
||||
VAddr addr_start;
|
||||
VAddr addr_end;
|
||||
T* data;
|
||||
ShaderInfo* data;
|
||||
|
||||
bool is_memory_marked = true;
|
||||
|
||||
constexpr bool Overlaps(VAddr start, VAddr end) const noexcept {
|
||||
bool Overlaps(VAddr start, VAddr end) const noexcept {
|
||||
return start < addr_end && addr_start < end;
|
||||
}
|
||||
};
|
||||
|
||||
public:
|
||||
virtual ~ShaderCache() = default;
|
||||
|
||||
/// @brief Removes shaders inside a given region
|
||||
/// @note Checks for ranges
|
||||
/// @param addr Start address of the invalidation
|
||||
/// @param size Number of bytes of the invalidation
|
||||
void InvalidateRegion(VAddr addr, std::size_t size) {
|
||||
std::scoped_lock lock{invalidation_mutex};
|
||||
InvalidatePagesInRegion(addr, size);
|
||||
RemovePendingShaders();
|
||||
}
|
||||
void InvalidateRegion(VAddr addr, size_t size);
|
||||
|
||||
/// @brief Unmarks a memory region as cached and marks it for removal
|
||||
/// @param addr Start address of the CPU write operation
|
||||
/// @param size Number of bytes of the CPU write operation
|
||||
void OnCPUWrite(VAddr addr, std::size_t size) {
|
||||
std::lock_guard lock{invalidation_mutex};
|
||||
InvalidatePagesInRegion(addr, size);
|
||||
}
|
||||
void OnCPUWrite(VAddr addr, size_t size);
|
||||
|
||||
/// @brief Flushes delayed removal operations
|
||||
void SyncGuestHost() {
|
||||
std::scoped_lock lock{invalidation_mutex};
|
||||
RemovePendingShaders();
|
||||
}
|
||||
void SyncGuestHost();
|
||||
|
||||
protected:
|
||||
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_,
|
||||
Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::Engines::KeplerCompute& kepler_compute_);
|
||||
|
||||
/// @brief Update the hashes and information of shader stages
|
||||
/// @param unique_hashes Shader hashes to store into when a stage is enabled
|
||||
/// @return True no success, false on error
|
||||
bool RefreshStages(std::array<u64, 6>& unique_hashes);
|
||||
|
||||
/// @brief Returns information about the current compute shader
|
||||
/// @return Pointer to a valid shader, nullptr on error
|
||||
const ShaderInfo* ComputeShader();
|
||||
|
||||
Tegra::MemoryManager& gpu_memory;
|
||||
Tegra::Engines::Maxwell3D& maxwell3d;
|
||||
Tegra::Engines::KeplerCompute& kepler_compute;
|
||||
|
||||
std::array<const ShaderInfo*, 6> shader_infos{};
|
||||
bool last_shaders_valid = false;
|
||||
|
||||
private:
|
||||
/// @brief Tries to obtain a cached shader starting in a given address
|
||||
/// @note Doesn't check for ranges, the given address has to be the start of the shader
|
||||
/// @param addr Start address of the shader, this doesn't cache for region
|
||||
/// @return Pointer to a valid shader, nullptr when nothing is found
|
||||
T* TryGet(VAddr addr) const {
|
||||
std::scoped_lock lock{lookup_mutex};
|
||||
|
||||
const auto it = lookup_cache.find(addr);
|
||||
if (it == lookup_cache.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return it->second->data;
|
||||
}
|
||||
|
||||
protected:
|
||||
explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_) : rasterizer{rasterizer_} {}
|
||||
ShaderInfo* TryGet(VAddr addr) const;
|
||||
|
||||
/// @brief Register in the cache a given entry
|
||||
/// @param data Shader to store in the cache
|
||||
/// @param addr Start address of the shader that will be registered
|
||||
/// @param size Size in bytes of the shader
|
||||
void Register(std::unique_ptr<T> data, VAddr addr, std::size_t size) {
|
||||
std::scoped_lock lock{invalidation_mutex, lookup_mutex};
|
||||
void Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t size);
|
||||
|
||||
const VAddr addr_end = addr + size;
|
||||
Entry* const entry = NewEntry(addr, addr_end, data.get());
|
||||
|
||||
const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
|
||||
for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
|
||||
invalidation_cache[page].push_back(entry);
|
||||
}
|
||||
|
||||
storage.push_back(std::move(data));
|
||||
|
||||
rasterizer.UpdatePagesCachedCount(addr, size, 1);
|
||||
}
|
||||
|
||||
/// @brief Called when a shader is going to be removed
|
||||
/// @param shader Shader that will be removed
|
||||
/// @pre invalidation_cache is locked
|
||||
/// @pre lookup_mutex is locked
|
||||
virtual void OnShaderRemoval([[maybe_unused]] T* shader) {}
|
||||
|
||||
private:
|
||||
/// @brief Invalidate pages in a given region
|
||||
/// @pre invalidation_mutex is locked
|
||||
void InvalidatePagesInRegion(VAddr addr, std::size_t size) {
|
||||
const VAddr addr_end = addr + size;
|
||||
const u64 page_end = (addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
|
||||
for (u64 page = addr >> PAGE_BITS; page < page_end; ++page) {
|
||||
auto it = invalidation_cache.find(page);
|
||||
if (it == invalidation_cache.end()) {
|
||||
continue;
|
||||
}
|
||||
InvalidatePageEntries(it->second, addr, addr_end);
|
||||
}
|
||||
}
|
||||
void InvalidatePagesInRegion(VAddr addr, size_t size);
|
||||
|
||||
/// @brief Remove shaders marked for deletion
|
||||
/// @pre invalidation_mutex is locked
|
||||
void RemovePendingShaders() {
|
||||
if (marked_for_removal.empty()) {
|
||||
return;
|
||||
}
|
||||
// Remove duplicates
|
||||
std::sort(marked_for_removal.begin(), marked_for_removal.end());
|
||||
marked_for_removal.erase(std::unique(marked_for_removal.begin(), marked_for_removal.end()),
|
||||
marked_for_removal.end());
|
||||
|
||||
std::vector<T*> removed_shaders;
|
||||
removed_shaders.reserve(marked_for_removal.size());
|
||||
|
||||
std::scoped_lock lock{lookup_mutex};
|
||||
|
||||
for (Entry* const entry : marked_for_removal) {
|
||||
removed_shaders.push_back(entry->data);
|
||||
|
||||
const auto it = lookup_cache.find(entry->addr_start);
|
||||
ASSERT(it != lookup_cache.end());
|
||||
lookup_cache.erase(it);
|
||||
}
|
||||
marked_for_removal.clear();
|
||||
|
||||
if (!removed_shaders.empty()) {
|
||||
RemoveShadersFromStorage(std::move(removed_shaders));
|
||||
}
|
||||
}
|
||||
void RemovePendingShaders();
|
||||
|
||||
/// @brief Invalidates entries in a given range for the passed page
|
||||
/// @param entries Vector of entries in the page, it will be modified on overlaps
|
||||
/// @param addr Start address of the invalidation
|
||||
/// @param addr_end Non-inclusive end address of the invalidation
|
||||
/// @pre invalidation_mutex is locked
|
||||
void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end) {
|
||||
std::size_t index = 0;
|
||||
while (index < entries.size()) {
|
||||
Entry* const entry = entries[index];
|
||||
if (!entry->Overlaps(addr, addr_end)) {
|
||||
++index;
|
||||
continue;
|
||||
}
|
||||
|
||||
UnmarkMemory(entry);
|
||||
RemoveEntryFromInvalidationCache(entry);
|
||||
marked_for_removal.push_back(entry);
|
||||
}
|
||||
}
|
||||
void InvalidatePageEntries(std::vector<Entry*>& entries, VAddr addr, VAddr addr_end);
|
||||
|
||||
/// @brief Removes all references to an entry in the invalidation cache
|
||||
/// @param entry Entry to remove from the invalidation cache
|
||||
/// @pre invalidation_mutex is locked
|
||||
void RemoveEntryFromInvalidationCache(const Entry* entry) {
|
||||
const u64 page_end = (entry->addr_end + PAGE_SIZE - 1) >> PAGE_BITS;
|
||||
for (u64 page = entry->addr_start >> PAGE_BITS; page < page_end; ++page) {
|
||||
const auto entries_it = invalidation_cache.find(page);
|
||||
ASSERT(entries_it != invalidation_cache.end());
|
||||
std::vector<Entry*>& entries = entries_it->second;
|
||||
|
||||
const auto entry_it = std::find(entries.begin(), entries.end(), entry);
|
||||
ASSERT(entry_it != entries.end());
|
||||
entries.erase(entry_it);
|
||||
}
|
||||
}
|
||||
void RemoveEntryFromInvalidationCache(const Entry* entry);
|
||||
|
||||
/// @brief Unmarks an entry from the rasterizer cache
|
||||
/// @param entry Entry to unmark from memory
|
||||
void UnmarkMemory(Entry* entry) {
|
||||
if (!entry->is_memory_marked) {
|
||||
return;
|
||||
}
|
||||
entry->is_memory_marked = false;
|
||||
|
||||
const VAddr addr = entry->addr_start;
|
||||
const std::size_t size = entry->addr_end - addr;
|
||||
rasterizer.UpdatePagesCachedCount(addr, size, -1);
|
||||
}
|
||||
void UnmarkMemory(Entry* entry);
|
||||
|
||||
/// @brief Removes a vector of shaders from a list
|
||||
/// @param removed_shaders Shaders to be removed from the storage
|
||||
/// @pre invalidation_mutex is locked
|
||||
/// @pre lookup_mutex is locked
|
||||
void RemoveShadersFromStorage(std::vector<T*> removed_shaders) {
|
||||
// Notify removals
|
||||
for (T* const shader : removed_shaders) {
|
||||
OnShaderRemoval(shader);
|
||||
}
|
||||
|
||||
// Remove them from the cache
|
||||
const auto is_removed = [&removed_shaders](const std::unique_ptr<T>& shader) {
|
||||
return std::find(removed_shaders.begin(), removed_shaders.end(), shader.get()) !=
|
||||
removed_shaders.end();
|
||||
};
|
||||
std::erase_if(storage, is_removed);
|
||||
}
|
||||
void RemoveShadersFromStorage(std::vector<ShaderInfo*> removed_shaders);
|
||||
|
||||
/// @brief Creates a new entry in the lookup cache and returns its pointer
|
||||
/// @pre lookup_mutex is locked
|
||||
Entry* NewEntry(VAddr addr, VAddr addr_end, T* data) {
|
||||
auto entry = std::make_unique<Entry>(Entry{addr, addr_end, data});
|
||||
Entry* const entry_pointer = entry.get();
|
||||
Entry* NewEntry(VAddr addr, VAddr addr_end, ShaderInfo* data);
|
||||
|
||||
lookup_cache.emplace(addr, std::move(entry));
|
||||
return entry_pointer;
|
||||
}
|
||||
/// @brief Create a new shader entry and register it
|
||||
const ShaderInfo* MakeShaderInfo(GenericEnvironment& env, VAddr cpu_addr);
|
||||
|
||||
VideoCore::RasterizerInterface& rasterizer;
|
||||
|
||||
|
@ -233,7 +135,7 @@ private:
|
|||
|
||||
std::unordered_map<u64, std::unique_ptr<Entry>> lookup_cache;
|
||||
std::unordered_map<u64, std::vector<Entry*>> invalidation_cache;
|
||||
std::vector<std::unique_ptr<T>> storage;
|
||||
std::vector<std::unique_ptr<ShaderInfo>> storage;
|
||||
std::vector<Entry*> marked_for_removal;
|
||||
};
|
||||
|
||||
|
|
453
src/video_core/shader_environment.cpp
Normal file
453
src/video_core/shader_environment.cpp
Normal file
|
@ -0,0 +1,453 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <utility>
|
||||
|
||||
#include "common/assert.h"
|
||||
#include "common/cityhash.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/div_ceil.h"
|
||||
#include "common/fs/fs.h"
|
||||
#include "common/logging/log.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/shader_environment.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
|
||||
constexpr u32 CACHE_VERSION = 3;
|
||||
|
||||
constexpr size_t INST_SIZE = sizeof(u64);
|
||||
|
||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||
|
||||
static u64 MakeCbufKey(u32 index, u32 offset) {
|
||||
return (static_cast<u64>(index) << 32) | offset;
|
||||
}
|
||||
|
||||
static Shader::TextureType ConvertType(const Tegra::Texture::TICEntry& entry) {
|
||||
switch (entry.texture_type) {
|
||||
case Tegra::Texture::TextureType::Texture1D:
|
||||
return Shader::TextureType::Color1D;
|
||||
case Tegra::Texture::TextureType::Texture2D:
|
||||
case Tegra::Texture::TextureType::Texture2DNoMipmap:
|
||||
return Shader::TextureType::Color2D;
|
||||
case Tegra::Texture::TextureType::Texture3D:
|
||||
return Shader::TextureType::Color3D;
|
||||
case Tegra::Texture::TextureType::TextureCubemap:
|
||||
return Shader::TextureType::ColorCube;
|
||||
case Tegra::Texture::TextureType::Texture1DArray:
|
||||
return Shader::TextureType::ColorArray1D;
|
||||
case Tegra::Texture::TextureType::Texture2DArray:
|
||||
return Shader::TextureType::ColorArray2D;
|
||||
case Tegra::Texture::TextureType::Texture1DBuffer:
|
||||
return Shader::TextureType::Buffer;
|
||||
case Tegra::Texture::TextureType::TextureCubeArray:
|
||||
return Shader::TextureType::ColorArrayCube;
|
||||
default:
|
||||
throw Shader::NotImplementedException("Unknown texture type");
|
||||
}
|
||||
}
|
||||
|
||||
GenericEnvironment::GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
|
||||
u32 start_address_)
|
||||
: gpu_memory{&gpu_memory_}, program_base{program_base_} {
|
||||
start_address = start_address_;
|
||||
}
|
||||
|
||||
GenericEnvironment::~GenericEnvironment() = default;
|
||||
|
||||
u32 GenericEnvironment::TextureBoundBuffer() const {
|
||||
return texture_bound;
|
||||
}
|
||||
|
||||
u32 GenericEnvironment::LocalMemorySize() const {
|
||||
return local_memory_size;
|
||||
}
|
||||
|
||||
u32 GenericEnvironment::SharedMemorySize() const {
|
||||
return shared_memory_size;
|
||||
}
|
||||
|
||||
std::array<u32, 3> GenericEnvironment::WorkgroupSize() const {
|
||||
return workgroup_size;
|
||||
}
|
||||
|
||||
u64 GenericEnvironment::ReadInstruction(u32 address) {
|
||||
read_lowest = std::min(read_lowest, address);
|
||||
read_highest = std::max(read_highest, address);
|
||||
|
||||
if (address >= cached_lowest && address < cached_highest) {
|
||||
return code[(address - cached_lowest) / INST_SIZE];
|
||||
}
|
||||
has_unbound_instructions = true;
|
||||
return gpu_memory->Read<u64>(program_base + address);
|
||||
}
|
||||
|
||||
std::optional<u64> GenericEnvironment::Analyze() {
|
||||
const std::optional<u64> size{TryFindSize()};
|
||||
if (!size) {
|
||||
return std::nullopt;
|
||||
}
|
||||
cached_lowest = start_address;
|
||||
cached_highest = start_address + static_cast<u32>(*size);
|
||||
return Common::CityHash64(reinterpret_cast<const char*>(code.data()), *size);
|
||||
}
|
||||
|
||||
void GenericEnvironment::SetCachedSize(size_t size_bytes) {
|
||||
cached_lowest = start_address;
|
||||
cached_highest = start_address + static_cast<u32>(size_bytes);
|
||||
code.resize(CachedSize());
|
||||
gpu_memory->ReadBlock(program_base + cached_lowest, code.data(), code.size() * sizeof(u64));
|
||||
}
|
||||
|
||||
size_t GenericEnvironment::CachedSize() const noexcept {
|
||||
return cached_highest - cached_lowest + INST_SIZE;
|
||||
}
|
||||
|
||||
size_t GenericEnvironment::ReadSize() const noexcept {
|
||||
return read_highest - read_lowest + INST_SIZE;
|
||||
}
|
||||
|
||||
bool GenericEnvironment::CanBeSerialized() const noexcept {
|
||||
return !has_unbound_instructions;
|
||||
}
|
||||
|
||||
u64 GenericEnvironment::CalculateHash() const {
|
||||
const size_t size{ReadSize()};
|
||||
const auto data{std::make_unique<char[]>(size)};
|
||||
gpu_memory->ReadBlock(program_base + read_lowest, data.get(), size);
|
||||
return Common::CityHash64(data.get(), size);
|
||||
}
|
||||
|
||||
void GenericEnvironment::Serialize(std::ofstream& file) const {
|
||||
const u64 code_size{static_cast<u64>(CachedSize())};
|
||||
const u64 num_texture_types{static_cast<u64>(texture_types.size())};
|
||||
const u64 num_cbuf_values{static_cast<u64>(cbuf_values.size())};
|
||||
|
||||
file.write(reinterpret_cast<const char*>(&code_size), sizeof(code_size))
|
||||
.write(reinterpret_cast<const char*>(&num_texture_types), sizeof(num_texture_types))
|
||||
.write(reinterpret_cast<const char*>(&num_cbuf_values), sizeof(num_cbuf_values))
|
||||
.write(reinterpret_cast<const char*>(&local_memory_size), sizeof(local_memory_size))
|
||||
.write(reinterpret_cast<const char*>(&texture_bound), sizeof(texture_bound))
|
||||
.write(reinterpret_cast<const char*>(&start_address), sizeof(start_address))
|
||||
.write(reinterpret_cast<const char*>(&cached_lowest), sizeof(cached_lowest))
|
||||
.write(reinterpret_cast<const char*>(&cached_highest), sizeof(cached_highest))
|
||||
.write(reinterpret_cast<const char*>(&stage), sizeof(stage))
|
||||
.write(reinterpret_cast<const char*>(code.data()), code_size);
|
||||
for (const auto [key, type] : texture_types) {
|
||||
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
||||
.write(reinterpret_cast<const char*>(&type), sizeof(type));
|
||||
}
|
||||
for (const auto [key, type] : cbuf_values) {
|
||||
file.write(reinterpret_cast<const char*>(&key), sizeof(key))
|
||||
.write(reinterpret_cast<const char*>(&type), sizeof(type));
|
||||
}
|
||||
if (stage == Shader::Stage::Compute) {
|
||||
file.write(reinterpret_cast<const char*>(&workgroup_size), sizeof(workgroup_size))
|
||||
.write(reinterpret_cast<const char*>(&shared_memory_size), sizeof(shared_memory_size));
|
||||
} else {
|
||||
file.write(reinterpret_cast<const char*>(&sph), sizeof(sph));
|
||||
}
|
||||
}
|
||||
|
||||
std::optional<u64> GenericEnvironment::TryFindSize() {
|
||||
static constexpr size_t BLOCK_SIZE = 0x1000;
|
||||
static constexpr size_t MAXIMUM_SIZE = 0x100000;
|
||||
|
||||
static constexpr u64 SELF_BRANCH_A = 0xE2400FFFFF87000FULL;
|
||||
static constexpr u64 SELF_BRANCH_B = 0xE2400FFFFF07000FULL;
|
||||
|
||||
GPUVAddr guest_addr{program_base + start_address};
|
||||
size_t offset{0};
|
||||
size_t size{BLOCK_SIZE};
|
||||
while (size <= MAXIMUM_SIZE) {
|
||||
code.resize(size / INST_SIZE);
|
||||
u64* const data = code.data() + offset / INST_SIZE;
|
||||
gpu_memory->ReadBlock(guest_addr, data, BLOCK_SIZE);
|
||||
for (size_t index = 0; index < BLOCK_SIZE; index += INST_SIZE) {
|
||||
const u64 inst = data[index / INST_SIZE];
|
||||
if (inst == SELF_BRANCH_A || inst == SELF_BRANCH_B) {
|
||||
return offset + index;
|
||||
}
|
||||
}
|
||||
guest_addr += BLOCK_SIZE;
|
||||
size += BLOCK_SIZE;
|
||||
offset += BLOCK_SIZE;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit,
|
||||
bool via_header_index, u32 raw) {
|
||||
const TextureHandle handle{raw, via_header_index};
|
||||
const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)};
|
||||
Tegra::Texture::TICEntry entry;
|
||||
gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry));
|
||||
const Shader::TextureType result{ConvertType(entry)};
|
||||
texture_types.emplace(raw, result);
|
||||
return result;
|
||||
}
|
||||
|
||||
GraphicsEnvironment::GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_,
|
||||
Maxwell::ShaderProgram program, GPUVAddr program_base_,
|
||||
u32 start_address_)
|
||||
: GenericEnvironment{gpu_memory_, program_base_, start_address_}, maxwell3d{&maxwell3d_} {
|
||||
gpu_memory->ReadBlock(program_base + start_address, &sph, sizeof(sph));
|
||||
switch (program) {
|
||||
case Maxwell::ShaderProgram::VertexA:
|
||||
stage = Shader::Stage::VertexA;
|
||||
stage_index = 0;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::VertexB:
|
||||
stage = Shader::Stage::VertexB;
|
||||
stage_index = 0;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::TesselationControl:
|
||||
stage = Shader::Stage::TessellationControl;
|
||||
stage_index = 1;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::TesselationEval:
|
||||
stage = Shader::Stage::TessellationEval;
|
||||
stage_index = 2;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::Geometry:
|
||||
stage = Shader::Stage::Geometry;
|
||||
stage_index = 3;
|
||||
break;
|
||||
case Maxwell::ShaderProgram::Fragment:
|
||||
stage = Shader::Stage::Fragment;
|
||||
stage_index = 4;
|
||||
break;
|
||||
default:
|
||||
UNREACHABLE_MSG("Invalid program={}", program);
|
||||
break;
|
||||
}
|
||||
const u64 local_size{sph.LocalMemorySize()};
|
||||
ASSERT(local_size <= std::numeric_limits<u32>::max());
|
||||
local_memory_size = static_cast<u32>(local_size);
|
||||
texture_bound = maxwell3d->regs.tex_cb_index;
|
||||
}
|
||||
|
||||
u32 GraphicsEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
|
||||
const auto& cbuf{maxwell3d->state.shader_stages[stage_index].const_buffers[cbuf_index]};
|
||||
ASSERT(cbuf.enabled);
|
||||
u32 value{};
|
||||
if (cbuf_offset < cbuf.size) {
|
||||
value = gpu_memory->Read<u32>(cbuf.address + cbuf_offset);
|
||||
}
|
||||
cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
|
||||
return value;
|
||||
}
|
||||
|
||||
Shader::TextureType GraphicsEnvironment::ReadTextureType(u32 handle) {
|
||||
const auto& regs{maxwell3d->regs};
|
||||
const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex};
|
||||
return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, via_header_index, handle);
|
||||
}
|
||||
|
||||
ComputeEnvironment::ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
|
||||
u32 start_address_)
|
||||
: GenericEnvironment{gpu_memory_, program_base_, start_address_}, kepler_compute{
|
||||
&kepler_compute_} {
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
stage = Shader::Stage::Compute;
|
||||
local_memory_size = qmd.local_pos_alloc;
|
||||
texture_bound = kepler_compute->regs.tex_cb_index;
|
||||
shared_memory_size = qmd.shared_alloc;
|
||||
workgroup_size = {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
|
||||
}
|
||||
|
||||
u32 ComputeEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
ASSERT(((qmd.const_buffer_enable_mask.Value() >> cbuf_index) & 1) != 0);
|
||||
const auto& cbuf{qmd.const_buffer_config[cbuf_index]};
|
||||
u32 value{};
|
||||
if (cbuf_offset < cbuf.size) {
|
||||
value = gpu_memory->Read<u32>(cbuf.Address() + cbuf_offset);
|
||||
}
|
||||
cbuf_values.emplace(MakeCbufKey(cbuf_index, cbuf_offset), value);
|
||||
return value;
|
||||
}
|
||||
|
||||
Shader::TextureType ComputeEnvironment::ReadTextureType(u32 handle) {
|
||||
const auto& regs{kepler_compute->regs};
|
||||
const auto& qmd{kepler_compute->launch_description};
|
||||
return ReadTextureTypeImpl(regs.tic.Address(), regs.tic.limit, qmd.linked_tsc != 0, handle);
|
||||
}
|
||||
|
||||
void FileEnvironment::Deserialize(std::ifstream& file) {
|
||||
u64 code_size{};
|
||||
u64 num_texture_types{};
|
||||
u64 num_cbuf_values{};
|
||||
file.read(reinterpret_cast<char*>(&code_size), sizeof(code_size))
|
||||
.read(reinterpret_cast<char*>(&num_texture_types), sizeof(num_texture_types))
|
||||
.read(reinterpret_cast<char*>(&num_cbuf_values), sizeof(num_cbuf_values))
|
||||
.read(reinterpret_cast<char*>(&local_memory_size), sizeof(local_memory_size))
|
||||
.read(reinterpret_cast<char*>(&texture_bound), sizeof(texture_bound))
|
||||
.read(reinterpret_cast<char*>(&start_address), sizeof(start_address))
|
||||
.read(reinterpret_cast<char*>(&read_lowest), sizeof(read_lowest))
|
||||
.read(reinterpret_cast<char*>(&read_highest), sizeof(read_highest))
|
||||
.read(reinterpret_cast<char*>(&stage), sizeof(stage));
|
||||
code = std::make_unique<u64[]>(Common::DivCeil(code_size, sizeof(u64)));
|
||||
file.read(reinterpret_cast<char*>(code.get()), code_size);
|
||||
for (size_t i = 0; i < num_texture_types; ++i) {
|
||||
u32 key;
|
||||
Shader::TextureType type;
|
||||
file.read(reinterpret_cast<char*>(&key), sizeof(key))
|
||||
.read(reinterpret_cast<char*>(&type), sizeof(type));
|
||||
texture_types.emplace(key, type);
|
||||
}
|
||||
for (size_t i = 0; i < num_cbuf_values; ++i) {
|
||||
u64 key;
|
||||
u32 value;
|
||||
file.read(reinterpret_cast<char*>(&key), sizeof(key))
|
||||
.read(reinterpret_cast<char*>(&value), sizeof(value));
|
||||
cbuf_values.emplace(key, value);
|
||||
}
|
||||
if (stage == Shader::Stage::Compute) {
|
||||
file.read(reinterpret_cast<char*>(&workgroup_size), sizeof(workgroup_size))
|
||||
.read(reinterpret_cast<char*>(&shared_memory_size), sizeof(shared_memory_size));
|
||||
} else {
|
||||
file.read(reinterpret_cast<char*>(&sph), sizeof(sph));
|
||||
}
|
||||
}
|
||||
|
||||
u64 FileEnvironment::ReadInstruction(u32 address) {
|
||||
if (address < read_lowest || address > read_highest) {
|
||||
throw Shader::LogicError("Out of bounds address {}", address);
|
||||
}
|
||||
return code[(address - read_lowest) / sizeof(u64)];
|
||||
}
|
||||
|
||||
u32 FileEnvironment::ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) {
|
||||
const auto it{cbuf_values.find(MakeCbufKey(cbuf_index, cbuf_offset))};
|
||||
if (it == cbuf_values.end()) {
|
||||
throw Shader::LogicError("Uncached read texture type");
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
Shader::TextureType FileEnvironment::ReadTextureType(u32 handle) {
|
||||
const auto it{texture_types.find(handle)};
|
||||
if (it == texture_types.end()) {
|
||||
throw Shader::LogicError("Uncached read texture type");
|
||||
}
|
||||
return it->second;
|
||||
}
|
||||
|
||||
u32 FileEnvironment::LocalMemorySize() const {
|
||||
return local_memory_size;
|
||||
}
|
||||
|
||||
u32 FileEnvironment::SharedMemorySize() const {
|
||||
return shared_memory_size;
|
||||
}
|
||||
|
||||
u32 FileEnvironment::TextureBoundBuffer() const {
|
||||
return texture_bound;
|
||||
}
|
||||
|
||||
std::array<u32, 3> FileEnvironment::WorkgroupSize() const {
|
||||
return workgroup_size;
|
||||
}
|
||||
|
||||
void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
|
||||
const std::filesystem::path& filename) try {
|
||||
std::ofstream file(filename, std::ios::binary | std::ios::ate | std::ios::app);
|
||||
file.exceptions(std::ifstream::failbit);
|
||||
if (!file.is_open()) {
|
||||
LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}",
|
||||
Common::FS::PathToUTF8String(filename));
|
||||
return;
|
||||
}
|
||||
if (file.tellp() == 0) {
|
||||
// Write header
|
||||
file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
|
||||
.write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION));
|
||||
}
|
||||
if (!std::ranges::all_of(envs, &GenericEnvironment::CanBeSerialized)) {
|
||||
return;
|
||||
}
|
||||
const u32 num_envs{static_cast<u32>(envs.size())};
|
||||
file.write(reinterpret_cast<const char*>(&num_envs), sizeof(num_envs));
|
||||
for (const GenericEnvironment* const env : envs) {
|
||||
env->Serialize(file);
|
||||
}
|
||||
file.write(key.data(), key.size_bytes());
|
||||
|
||||
} catch (const std::ios_base::failure& e) {
|
||||
LOG_ERROR(Common_Filesystem, "{}", e.what());
|
||||
if (!Common::FS::RemoveFile(filename)) {
|
||||
LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
|
||||
Common::FS::PathToUTF8String(filename));
|
||||
}
|
||||
}
|
||||
|
||||
void LoadPipelines(
|
||||
std::stop_token stop_loading, const std::filesystem::path& filename,
|
||||
Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
|
||||
Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics) try {
|
||||
std::ifstream file(filename, std::ios::binary | std::ios::ate);
|
||||
if (!file.is_open()) {
|
||||
return;
|
||||
}
|
||||
file.exceptions(std::ifstream::failbit);
|
||||
const auto end{file.tellg()};
|
||||
file.seekg(0, std::ios::beg);
|
||||
|
||||
std::array<char, 8> magic_number;
|
||||
u32 cache_version;
|
||||
file.read(magic_number.data(), magic_number.size())
|
||||
.read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
|
||||
if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) {
|
||||
file.close();
|
||||
if (Common::FS::RemoveFile(filename)) {
|
||||
if (magic_number != MAGIC_NUMBER) {
|
||||
LOG_ERROR(Common_Filesystem, "Invalid pipeline cache file");
|
||||
}
|
||||
if (cache_version != CACHE_VERSION) {
|
||||
LOG_INFO(Common_Filesystem, "Deleting old pipeline cache");
|
||||
}
|
||||
} else {
|
||||
LOG_ERROR(Common_Filesystem,
|
||||
"Invalid pipeline cache file and failed to delete it in \"{}\"",
|
||||
Common::FS::PathToUTF8String(filename));
|
||||
}
|
||||
return;
|
||||
}
|
||||
while (file.tellg() != end) {
|
||||
if (stop_loading.stop_requested()) {
|
||||
return;
|
||||
}
|
||||
u32 num_envs{};
|
||||
file.read(reinterpret_cast<char*>(&num_envs), sizeof(num_envs));
|
||||
std::vector<FileEnvironment> envs(num_envs);
|
||||
for (FileEnvironment& env : envs) {
|
||||
env.Deserialize(file);
|
||||
}
|
||||
if (envs.front().ShaderStage() == Shader::Stage::Compute) {
|
||||
load_compute(file, std::move(envs.front()));
|
||||
} else {
|
||||
load_graphics(file, std::move(envs));
|
||||
}
|
||||
}
|
||||
|
||||
} catch (const std::ios_base::failure& e) {
|
||||
LOG_ERROR(Common_Filesystem, "{}", e.what());
|
||||
if (!Common::FS::RemoveFile(filename)) {
|
||||
LOG_ERROR(Common_Filesystem, "Failed to delete pipeline cache file {}",
|
||||
Common::FS::PathToUTF8String(filename));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace VideoCommon
|
198
src/video_core/shader_environment.h
Normal file
198
src/video_core/shader_environment.h
Normal file
|
@ -0,0 +1,198 @@
|
|||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <filesystem>
|
||||
#include <iosfwd>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
#include <span>
|
||||
#include <type_traits>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "common/unique_function.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "video_core/engines/kepler_compute.h"
|
||||
#include "video_core/engines/maxwell_3d.h"
|
||||
#include "video_core/textures/texture.h"
|
||||
|
||||
namespace Tegra {
|
||||
class Memorymanager;
|
||||
}
|
||||
|
||||
namespace VideoCommon {
|
||||
|
||||
struct TextureHandle {
|
||||
explicit TextureHandle(u32 data, bool via_header_index) {
|
||||
if (via_header_index) {
|
||||
image = data;
|
||||
sampler = data;
|
||||
} else {
|
||||
const Tegra::Texture::TextureHandle handle{data};
|
||||
image = handle.tic_id;
|
||||
sampler = via_header_index ? image : handle.tsc_id.Value();
|
||||
}
|
||||
}
|
||||
|
||||
u32 image;
|
||||
u32 sampler;
|
||||
};
|
||||
|
||||
class GenericEnvironment : public Shader::Environment {
|
||||
public:
|
||||
explicit GenericEnvironment() = default;
|
||||
explicit GenericEnvironment(Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
|
||||
u32 start_address_);
|
||||
|
||||
~GenericEnvironment() override;
|
||||
|
||||
[[nodiscard]] u32 TextureBoundBuffer() const final;
|
||||
|
||||
[[nodiscard]] u32 LocalMemorySize() const final;
|
||||
|
||||
[[nodiscard]] u32 SharedMemorySize() const final;
|
||||
|
||||
[[nodiscard]] std::array<u32, 3> WorkgroupSize() const final;
|
||||
|
||||
[[nodiscard]] u64 ReadInstruction(u32 address) final;
|
||||
|
||||
[[nodiscard]] std::optional<u64> Analyze();
|
||||
|
||||
void SetCachedSize(size_t size_bytes);
|
||||
|
||||
[[nodiscard]] size_t CachedSize() const noexcept;
|
||||
|
||||
[[nodiscard]] size_t ReadSize() const noexcept;
|
||||
|
||||
[[nodiscard]] bool CanBeSerialized() const noexcept;
|
||||
|
||||
[[nodiscard]] u64 CalculateHash() const;
|
||||
|
||||
void Serialize(std::ofstream& file) const;
|
||||
|
||||
protected:
|
||||
std::optional<u64> TryFindSize();
|
||||
|
||||
Shader::TextureType ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, bool via_header_index,
|
||||
u32 raw);
|
||||
|
||||
Tegra::MemoryManager* gpu_memory{};
|
||||
GPUVAddr program_base{};
|
||||
|
||||
std::vector<u64> code;
|
||||
std::unordered_map<u32, Shader::TextureType> texture_types;
|
||||
std::unordered_map<u64, u32> cbuf_values;
|
||||
|
||||
u32 local_memory_size{};
|
||||
u32 texture_bound{};
|
||||
u32 shared_memory_size{};
|
||||
std::array<u32, 3> workgroup_size{};
|
||||
|
||||
u32 read_lowest = std::numeric_limits<u32>::max();
|
||||
u32 read_highest = 0;
|
||||
|
||||
u32 cached_lowest = std::numeric_limits<u32>::max();
|
||||
u32 cached_highest = 0;
|
||||
|
||||
bool has_unbound_instructions = false;
|
||||
};
|
||||
|
||||
class GraphicsEnvironment final : public GenericEnvironment {
|
||||
public:
|
||||
explicit GraphicsEnvironment() = default;
|
||||
explicit GraphicsEnvironment(Tegra::Engines::Maxwell3D& maxwell3d_,
|
||||
Tegra::MemoryManager& gpu_memory_,
|
||||
Tegra::Engines::Maxwell3D::Regs::ShaderProgram program,
|
||||
GPUVAddr program_base_, u32 start_address_);
|
||||
|
||||
~GraphicsEnvironment() override = default;
|
||||
|
||||
u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
|
||||
|
||||
Shader::TextureType ReadTextureType(u32 handle) override;
|
||||
|
||||
private:
|
||||
Tegra::Engines::Maxwell3D* maxwell3d{};
|
||||
size_t stage_index{};
|
||||
};
|
||||
|
||||
class ComputeEnvironment final : public GenericEnvironment {
|
||||
public:
|
||||
explicit ComputeEnvironment() = default;
|
||||
explicit ComputeEnvironment(Tegra::Engines::KeplerCompute& kepler_compute_,
|
||||
Tegra::MemoryManager& gpu_memory_, GPUVAddr program_base_,
|
||||
u32 start_address_);
|
||||
|
||||
~ComputeEnvironment() override = default;
|
||||
|
||||
u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
|
||||
|
||||
Shader::TextureType ReadTextureType(u32 handle) override;
|
||||
|
||||
private:
|
||||
Tegra::Engines::KeplerCompute* kepler_compute{};
|
||||
};
|
||||
|
||||
class FileEnvironment final : public Shader::Environment {
|
||||
public:
|
||||
FileEnvironment() = default;
|
||||
~FileEnvironment() override = default;
|
||||
|
||||
FileEnvironment& operator=(FileEnvironment&&) noexcept = default;
|
||||
FileEnvironment(FileEnvironment&&) noexcept = default;
|
||||
|
||||
FileEnvironment& operator=(const FileEnvironment&) = delete;
|
||||
FileEnvironment(const FileEnvironment&) = delete;
|
||||
|
||||
void Deserialize(std::ifstream& file);
|
||||
|
||||
[[nodiscard]] u64 ReadInstruction(u32 address) override;
|
||||
|
||||
[[nodiscard]] u32 ReadCbufValue(u32 cbuf_index, u32 cbuf_offset) override;
|
||||
|
||||
[[nodiscard]] Shader::TextureType ReadTextureType(u32 handle) override;
|
||||
|
||||
[[nodiscard]] u32 LocalMemorySize() const override;
|
||||
|
||||
[[nodiscard]] u32 SharedMemorySize() const override;
|
||||
|
||||
[[nodiscard]] u32 TextureBoundBuffer() const override;
|
||||
|
||||
[[nodiscard]] std::array<u32, 3> WorkgroupSize() const override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<u64[]> code;
|
||||
std::unordered_map<u32, Shader::TextureType> texture_types;
|
||||
std::unordered_map<u64, u32> cbuf_values;
|
||||
std::array<u32, 3> workgroup_size{};
|
||||
u32 local_memory_size{};
|
||||
u32 shared_memory_size{};
|
||||
u32 texture_bound{};
|
||||
u32 read_lowest{};
|
||||
u32 read_highest{};
|
||||
};
|
||||
|
||||
void SerializePipeline(std::span<const char> key, std::span<const GenericEnvironment* const> envs,
|
||||
const std::filesystem::path& filename);
|
||||
|
||||
template <typename Key, typename Envs>
|
||||
void SerializePipeline(const Key& key, const Envs& envs, const std::filesystem::path& filename) {
|
||||
static_assert(std::is_trivially_copyable_v<Key>);
|
||||
static_assert(std::has_unique_object_representations_v<Key>);
|
||||
SerializePipeline(std::span(reinterpret_cast<const char*>(&key), sizeof(key)),
|
||||
std::span(envs.data(), envs.size()), filename);
|
||||
}
|
||||
|
||||
void LoadPipelines(
|
||||
std::stop_token stop_loading, const std::filesystem::path& filename,
|
||||
Common::UniqueFunction<void, std::ifstream&, FileEnvironment> load_compute,
|
||||
Common::UniqueFunction<void, std::ifstream&, std::vector<FileEnvironment>> load_graphics);
|
||||
|
||||
} // namespace VideoCommon
|
Loading…
Reference in a new issue