From 8584a77eb2e51e702d4c941e0fca07aafc47f29b Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 22 Jun 2020 19:07:04 -0300
Subject: [PATCH] vk_pipeline_cache: Avoid hashing and comparing dynamic state
 when possible

With extended dynamic states, some bytes don't have to be collected from
the pipeline key, hence we can avoid hashing and comparing them on
lookups.
---
 .../renderer_vulkan/fixed_pipeline_state.cpp  | 11 ++++---
 .../renderer_vulkan/fixed_pipeline_state.h    | 32 +++++++++++--------
 .../renderer_vulkan/vk_graphics_pipeline.cpp  | 18 +++++++++--
 .../renderer_vulkan/vk_pipeline_cache.cpp     |  7 ++--
 .../renderer_vulkan/vk_pipeline_cache.h       |  4 +++
 .../renderer_vulkan/vk_rasterizer.cpp         |  2 +-
 6 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 07358b0f9..d1f0ea932 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -39,7 +39,7 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = {
 
 } // Anonymous namespace
 
-void FixedPipelineState::Fill(const Maxwell& regs) {
+void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) {
     const auto& clip = regs.view_volume_clip_control;
     const std::array enabled_lut = {regs.polygon_offset_point_enable,
                                     regs.polygon_offset_line_enable,
@@ -86,7 +86,10 @@ void FixedPipelineState::Fill(const Maxwell& regs) {
     std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(),
                    [](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); });
 
-    dynamic_state.Fill(regs);
+    if (!has_extended_dynamic_state) {
+        no_extended_dynamic_state.Assign(1);
+        dynamic_state.Fill(regs);
+    }
 }
 
 void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) {
@@ -173,12 +176,12 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) {
 }
 
 std::size_t FixedPipelineState::Hash() const noexcept {
-    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
     return static_cast<std::size_t>(hash);
 }
 
 bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept {
-    return std::memcmp(this, &rhs, sizeof *this) == 0;
+    return std::memcmp(this, &rhs, Size()) == 0;
 }
 
 u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 09d05702d..cdcbb65f5 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -177,18 +177,19 @@ struct FixedPipelineState {
 
     union {
         u32 raw;
-        BitField<0, 1, u32> primitive_restart_enable;
-        BitField<1, 1, u32> depth_bias_enable;
-        BitField<2, 1, u32> depth_clamp_disabled;
-        BitField<3, 1, u32> ndc_minus_one_to_one;
-        BitField<4, 2, u32> polygon_mode;
-        BitField<6, 5, u32> patch_control_points_minus_one;
-        BitField<11, 2, u32> tessellation_primitive;
-        BitField<13, 2, u32> tessellation_spacing;
-        BitField<15, 1, u32> tessellation_clockwise;
-        BitField<16, 1, u32> logic_op_enable;
-        BitField<17, 4, u32> logic_op;
-        BitField<21, 1, u32> rasterize_enable;
+        BitField<0, 1, u32> no_extended_dynamic_state;
+        BitField<2, 1, u32> primitive_restart_enable;
+        BitField<3, 1, u32> depth_bias_enable;
+        BitField<4, 1, u32> depth_clamp_disabled;
+        BitField<5, 1, u32> ndc_minus_one_to_one;
+        BitField<6, 2, u32> polygon_mode;
+        BitField<8, 5, u32> patch_control_points_minus_one;
+        BitField<13, 2, u32> tessellation_primitive;
+        BitField<15, 2, u32> tessellation_spacing;
+        BitField<17, 1, u32> tessellation_clockwise;
+        BitField<18, 1, u32> logic_op_enable;
+        BitField<19, 4, u32> logic_op;
+        BitField<23, 1, u32> rasterize_enable;
     };
     u32 point_size;
     std::array<u32, Maxwell::NumVertexArrays> binding_divisors;
@@ -197,7 +198,7 @@ struct FixedPipelineState {
     std::array<u16, Maxwell::NumViewports> viewport_swizzles;
     DynamicState dynamic_state;
 
-    void Fill(const Maxwell& regs);
+    void Fill(const Maxwell& regs, bool has_extended_dynamic_state);
 
     std::size_t Hash() const noexcept;
 
@@ -206,6 +207,11 @@ struct FixedPipelineState {
     bool operator!=(const FixedPipelineState& rhs) const noexcept {
         return !operator==(rhs);
     }
+
+    std::size_t Size() const noexcept {
+        const std::size_t total_size = sizeof *this;
+        return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
+    }
 };
 static_assert(std::has_unique_object_representations_v<FixedPipelineState>);
 static_assert(std::is_trivially_copyable_v<FixedPipelineState>);
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 15d9ac3b0..844445105 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -177,8 +177,22 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
 vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
                                                 const SPIRVProgram& program) const {
     const auto& state = fixed_state;
-    const auto& dynamic = fixed_state.dynamic_state;
-    const auto& viewport_swizzles = fixed_state.viewport_swizzles;
+    const auto& viewport_swizzles = state.viewport_swizzles;
+
+    FixedPipelineState::DynamicState dynamic;
+    if (device.IsExtExtendedDynamicStateSupported()) {
+        // Insert dummy values, as long as they are valid they don't matter as extended dynamic
+        // state is ignored
+        dynamic.raw1 = 0;
+        dynamic.raw2 = 0;
+        for (FixedPipelineState::VertexBinding& binding : dynamic.vertex_bindings) {
+            // Enable all vertex bindings
+            binding.raw = 0;
+            binding.enabled.Assign(1);
+        }
+    } else {
+        dynamic = state.dynamic_state;
+    }
 
     std::vector<VkVertexInputBindingDescription> vertex_bindings;
     std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index e684c17a6..3da835324 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -116,12 +116,12 @@ u32 FillDescriptorLayout(const ShaderEntries& entries,
 } // Anonymous namespace
 
 std::size_t GraphicsPipelineCacheKey::Hash() const noexcept {
-    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this);
+    const u64 hash = Common::CityHash64(reinterpret_cast<const char*>(this), Size());
     return static_cast<std::size_t>(hash);
 }
 
 bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept {
-    return std::memcmp(&rhs, this, sizeof *this) == 0;
+    return std::memcmp(&rhs, this, Size()) == 0;
 }
 
 std::size_t ComputePipelineCacheKey::Hash() const noexcept {
@@ -312,7 +312,8 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
     const auto& gpu = system.GPU().Maxwell3D();
 
     Specialization specialization;
-    if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points) {
+    if (fixed_state.dynamic_state.Topology() == Maxwell::PrimitiveTopology::Points ||
+        device.IsExtExtendedDynamicStateSupported()) {
         float point_size;
         std::memcpy(&point_size, &fixed_state.point_size, sizeof(float));
         specialization.point_size = point_size;
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 7f6c56261..0a3fe65fb 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -56,6 +56,10 @@ struct GraphicsPipelineCacheKey {
     bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
         return !operator==(rhs);
     }
+
+    std::size_t Size() const noexcept {
+        return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
+    }
 };
 static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
 static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 40d8df8c7..7625871c2 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -413,7 +413,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 
     const auto& gpu = system.GPU().Maxwell3D();
     GraphicsPipelineCacheKey key;
-    key.fixed_state.Fill(gpu.regs);
+    key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());
 
     buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));