Merge pull request #2917 from FernandoS27/fermi-deduction-2
TextureCache: Add the ability to deduce if two textures are depth on blit.
This commit is contained in:
commit
0a662d009b
|
@ -224,8 +224,13 @@ public:
|
||||||
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
|
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
|
||||||
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
const Tegra::Engines::Fermi2D::Config& copy_config) {
|
||||||
std::lock_guard lock{mutex};
|
std::lock_guard lock{mutex};
|
||||||
std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config);
|
SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
|
||||||
std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config);
|
SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
|
||||||
|
const GPUVAddr src_gpu_addr = src_config.Address();
|
||||||
|
const GPUVAddr dst_gpu_addr = dst_config.Address();
|
||||||
|
DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
|
||||||
|
std::pair<TSurface, TView> dst_surface = GetSurface(dst_gpu_addr, dst_params, true, false);
|
||||||
|
std::pair<TSurface, TView> src_surface = GetSurface(src_gpu_addr, src_params, true, false);
|
||||||
ImageBlit(src_surface.second, dst_surface.second, copy_config);
|
ImageBlit(src_surface.second, dst_surface.second, copy_config);
|
||||||
dst_surface.first->MarkAsModified(true, Tick());
|
dst_surface.first->MarkAsModified(true, Tick());
|
||||||
}
|
}
|
||||||
|
@ -357,6 +362,29 @@ private:
|
||||||
BufferCopy = 3,
|
BufferCopy = 3,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class DeductionType : u32 {
|
||||||
|
DeductionComplete,
|
||||||
|
DeductionIncomplete,
|
||||||
|
DeductionFailed,
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Deduction {
|
||||||
|
DeductionType type{DeductionType::DeductionFailed};
|
||||||
|
TSurface surface{};
|
||||||
|
|
||||||
|
bool Failed() const {
|
||||||
|
return type == DeductionType::DeductionFailed;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool Incomplete() const {
|
||||||
|
return type == DeductionType::DeductionIncomplete;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool IsDepth() const {
|
||||||
|
return surface->GetSurfaceParams().IsPixelFormatZeta();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
|
* `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
|
||||||
* @param overlaps, the overlapping surfaces registered in the cache.
|
* @param overlaps, the overlapping surfaces registered in the cache.
|
||||||
|
@ -691,6 +719,120 @@ private:
|
||||||
MatchTopologyResult::FullMatch);
|
MatchTopologyResult::FullMatch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* `DeduceSurface` gets the starting address and parameters of a candidate surface and tries
|
||||||
|
* to find a matching surface within the cache that's similar to it. If there are many textures
|
||||||
|
* or the texture found if entirely incompatible, it will fail. If no texture is found, the
|
||||||
|
* blit will be unsuccessful.
|
||||||
|
* @param gpu_addr, the starting address of the candidate surface.
|
||||||
|
* @param params, the paremeters on the candidate surface.
|
||||||
|
**/
|
||||||
|
Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
|
||||||
|
const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
|
||||||
|
const auto cache_addr{ToCacheAddr(host_ptr)};
|
||||||
|
|
||||||
|
if (!cache_addr) {
|
||||||
|
Deduction result{};
|
||||||
|
result.type = DeductionType::DeductionFailed;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
|
||||||
|
TSurface& current_surface = iter->second;
|
||||||
|
const auto topological_result = current_surface->MatchesTopology(params);
|
||||||
|
if (topological_result != MatchTopologyResult::FullMatch) {
|
||||||
|
Deduction result{};
|
||||||
|
result.type = DeductionType::DeductionFailed;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
const auto struct_result = current_surface->MatchesStructure(params);
|
||||||
|
if (struct_result != MatchStructureResult::None &&
|
||||||
|
current_surface->MatchTarget(params.target)) {
|
||||||
|
Deduction result{};
|
||||||
|
result.type = DeductionType::DeductionComplete;
|
||||||
|
result.surface = current_surface;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::size_t candidate_size = params.GetGuestSizeInBytes();
|
||||||
|
auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
|
||||||
|
|
||||||
|
if (overlaps.empty()) {
|
||||||
|
Deduction result{};
|
||||||
|
result.type = DeductionType::DeductionIncomplete;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (overlaps.size() > 1) {
|
||||||
|
Deduction result{};
|
||||||
|
result.type = DeductionType::DeductionFailed;
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
|
Deduction result{};
|
||||||
|
result.type = DeductionType::DeductionComplete;
|
||||||
|
result.surface = overlaps[0];
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* `DeduceBestBlit` gets the a source and destination starting address and parameters,
|
||||||
|
* and tries to deduce if they are supposed to be depth textures. If so, their
|
||||||
|
* parameters are modified and fixed into so.
|
||||||
|
* @param gpu_addr, the starting address of the candidate surface.
|
||||||
|
* @param params, the parameters on the candidate surface.
|
||||||
|
**/
|
||||||
|
void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
|
||||||
|
const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
|
||||||
|
auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
|
||||||
|
auto deduced_dst = DeduceSurface(src_gpu_addr, src_params);
|
||||||
|
if (deduced_src.Failed() || deduced_dst.Failed()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const bool incomplete_src = deduced_src.Incomplete();
|
||||||
|
const bool incomplete_dst = deduced_dst.Incomplete();
|
||||||
|
|
||||||
|
if (incomplete_src && incomplete_dst) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const bool any_incomplete = incomplete_src || incomplete_dst;
|
||||||
|
|
||||||
|
if (!any_incomplete) {
|
||||||
|
if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (incomplete_src && !(deduced_dst.IsDepth())) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (incomplete_dst && !(deduced_src.IsDepth())) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto inherit_format = ([](SurfaceParams& to, TSurface from) {
|
||||||
|
const SurfaceParams& params = from->GetSurfaceParams();
|
||||||
|
to.pixel_format = params.pixel_format;
|
||||||
|
to.component_type = params.component_type;
|
||||||
|
to.type = params.type;
|
||||||
|
});
|
||||||
|
// Now we got the cases where one or both is Depth and the other is not known
|
||||||
|
if (!incomplete_src) {
|
||||||
|
inherit_format(src_params, deduced_src.surface);
|
||||||
|
} else {
|
||||||
|
inherit_format(src_params, deduced_dst.surface);
|
||||||
|
}
|
||||||
|
if (!incomplete_dst) {
|
||||||
|
inherit_format(dst_params, deduced_dst.surface);
|
||||||
|
} else {
|
||||||
|
inherit_format(dst_params, deduced_src.surface);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
|
std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
|
||||||
bool preserve_contents) {
|
bool preserve_contents) {
|
||||||
auto new_surface{GetUncachedSurface(gpu_addr, params)};
|
auto new_surface{GetUncachedSurface(gpu_addr, params)};
|
||||||
|
|
Loading…
Reference in a new issue