From b62ffb612dbd672371d163e3b511e81f0c2282e6 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 27 Dec 2022 16:40:28 -0500
Subject: [PATCH] Vulkan: rework stencil tracking.

---
 src/video_core/engines/maxwell_3d.h           |   8 +-
 .../renderer_vulkan/vk_rasterizer.cpp         | 110 ++++++++++++++----
 .../renderer_vulkan/vk_state_tracker.cpp      |  26 +++--
 .../renderer_vulkan/vk_state_tracker.h        |  65 +++++++++++
 4 files changed, 171 insertions(+), 38 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index dbefcd715..a2dff0350 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -2711,7 +2711,7 @@ public:
                 u32 post_z_pixel_imask;                                                ///< 0x0F1C
                 INSERT_PADDING_BYTES_NOINIT(0x20);
                 ConstantColorRendering const_color_rendering;                          ///< 0x0F40
-                s32 stencil_back_ref;                                                  ///< 0x0F54
+                u32 stencil_back_ref;                                                  ///< 0x0F54
                 u32 stencil_back_mask;                                                 ///< 0x0F58
                 u32 stencil_back_func_mask;                                            ///< 0x0F5C
                 INSERT_PADDING_BYTES_NOINIT(0x14);
@@ -2835,9 +2835,9 @@ public:
                 Blend blend;                                                           ///< 0x133C
                 u32 stencil_enable;                                                    ///< 0x1380
                 StencilOp stencil_front_op;                                            ///< 0x1384
-                s32 stencil_front_ref;                                                 ///< 0x1394
-                s32 stencil_front_func_mask;                                           ///< 0x1398
-                s32 stencil_front_mask;                                                ///< 0x139C
+                u32 stencil_front_ref;                                                 ///< 0x1394
+                u32 stencil_front_func_mask;                                           ///< 0x1398
+                u32 stencil_front_mask;                                                ///< 0x139C
                 INSERT_PADDING_BYTES_NOINIT(0x4);
                 u32 draw_auto_start_byte_count;                                        ///< 0x13A4
                 PsSaturate frag_color_clamp;                                           ///< 0x13A8
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index da76b9a22..fc746fe2c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -886,32 +886,92 @@ void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D::Regs& regs)
     if (!state_tracker.TouchStencilProperties()) {
         return;
     }
-    if (regs.stencil_two_side_enable) {
-        // Separate values per face
-        scheduler.Record(
-            [front_ref = regs.stencil_front_ref, front_write_mask = regs.stencil_front_mask,
-             front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_ref,
-             back_write_mask = regs.stencil_back_mask,
-             back_test_mask = regs.stencil_back_func_mask](vk::CommandBuffer cmdbuf) {
-                // Front face
-                cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_BIT, front_ref);
-                cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_BIT, front_write_mask);
-                cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_BIT, front_test_mask);
-
-                // Back face
-                cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
-                cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
-                cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
-            });
-    } else {
-        // Front face defines both faces
-        scheduler.Record([ref = regs.stencil_front_ref, write_mask = regs.stencil_front_mask,
-                          test_mask = regs.stencil_front_func_mask](vk::CommandBuffer cmdbuf) {
-            cmdbuf.SetStencilReference(VK_STENCIL_FACE_FRONT_AND_BACK, ref);
-            cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_FRONT_AND_BACK, write_mask);
-            cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_FRONT_AND_BACK, test_mask);
-        });
+    bool update_references = state_tracker.TouchStencilReference();
+    bool update_write_mask = state_tracker.TouchStencilWriteMask();
+    bool update_compare_masks = state_tracker.TouchStencilCompare();
+    if (state_tracker.TouchStencilSide(regs.stencil_two_side_enable != 0)) {
+        update_references = true;
+        update_write_mask = true;
+        update_compare_masks = true;
     }
+    if (update_references) {
+        [&]() {
+            if (regs.stencil_two_side_enable) {
+                if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref) &&
+                    !state_tracker.CheckStencilReferenceBack(regs.stencil_back_ref)) {
+                    return;
+                }
+            } else {
+                if (!state_tracker.CheckStencilReferenceFront(regs.stencil_front_ref)) {
+                    return;
+                }
+            }
+            scheduler.Record([front_ref = regs.stencil_front_ref, back_ref = regs.stencil_back_ref,
+                              two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
+                const bool set_back = two_sided && front_ref != back_ref;
+                // Front face
+                cmdbuf.SetStencilReference(set_back ? VK_STENCIL_FACE_FRONT_BIT
+                                                    : VK_STENCIL_FACE_FRONT_AND_BACK,
+                                           front_ref);
+                if (set_back) {
+                    cmdbuf.SetStencilReference(VK_STENCIL_FACE_BACK_BIT, back_ref);
+                }
+            });
+        }();
+    }
+    if (update_write_mask) {
+        [&]() {
+            if (regs.stencil_two_side_enable) {
+                if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask) &&
+                    !state_tracker.CheckStencilWriteMaskBack(regs.stencil_back_mask)) {
+                    return;
+                }
+            } else {
+                if (!state_tracker.CheckStencilWriteMaskFront(regs.stencil_front_mask)) {
+                    return;
+                }
+            }
+            scheduler.Record([front_write_mask = regs.stencil_front_mask,
+                              back_write_mask = regs.stencil_back_mask,
+                              two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
+                const bool set_back = two_sided && front_write_mask != back_write_mask;
+                // Front face
+                cmdbuf.SetStencilWriteMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
+                                                    : VK_STENCIL_FACE_FRONT_AND_BACK,
+                                           front_write_mask);
+                if (set_back) {
+                    cmdbuf.SetStencilWriteMask(VK_STENCIL_FACE_BACK_BIT, back_write_mask);
+                }
+            });
+        }();
+    }
+    if (update_compare_masks) {
+        [&]() {
+            if (regs.stencil_two_side_enable) {
+                if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask) &&
+                    !state_tracker.CheckStencilCompareMaskBack(regs.stencil_back_func_mask)) {
+                    return;
+                }
+            } else {
+                if (!state_tracker.CheckStencilCompareMaskFront(regs.stencil_front_func_mask)) {
+                    return;
+                }
+            }
+            scheduler.Record([front_test_mask = regs.stencil_front_func_mask,
+                              back_test_mask = regs.stencil_back_func_mask,
+                              two_sided = regs.stencil_two_side_enable](vk::CommandBuffer cmdbuf) {
+                const bool set_back = two_sided && front_test_mask != back_test_mask;
+                // Front face
+                cmdbuf.SetStencilCompareMask(set_back ? VK_STENCIL_FACE_FRONT_BIT
+                                                      : VK_STENCIL_FACE_FRONT_AND_BACK,
+                                             front_test_mask);
+                if (set_back) {
+                    cmdbuf.SetStencilCompareMask(VK_STENCIL_FACE_BACK_BIT, back_test_mask);
+                }
+            });
+        }();
+    }
+    state_tracker.ClearStencilReset();
 }
 
 void RasterizerVulkan::UpdateLineWidth(Tegra::Engines::Maxwell3D::Regs& regs) {
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
index bfea503de..e5cf97472 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp
@@ -33,6 +33,9 @@ Flags MakeInvalidationFlags() {
         BlendConstants,
         DepthBounds,
         StencilProperties,
+        StencilReference,
+        StencilWriteMask,
+        StencilCompare,
         LineWidth,
         CullMode,
         DepthBoundsEnable,
@@ -99,14 +102,17 @@ void SetupDirtyDepthBounds(Tables& tables) {
 }
 
 void SetupDirtyStencilProperties(Tables& tables) {
-    auto& table = tables[0];
-    table[OFF(stencil_two_side_enable)] = StencilProperties;
-    table[OFF(stencil_front_ref)] = StencilProperties;
-    table[OFF(stencil_front_mask)] = StencilProperties;
-    table[OFF(stencil_front_func_mask)] = StencilProperties;
-    table[OFF(stencil_back_ref)] = StencilProperties;
-    table[OFF(stencil_back_mask)] = StencilProperties;
-    table[OFF(stencil_back_func_mask)] = StencilProperties;
+    const auto setup = [&](size_t position, u8 flag) {
+        tables[0][position] = flag;
+        tables[1][position] = StencilProperties;
+    };
+    tables[0][OFF(stencil_two_side_enable)] = StencilProperties;
+    setup(OFF(stencil_front_ref), StencilReference);
+    setup(OFF(stencil_front_mask), StencilWriteMask);
+    setup(OFF(stencil_front_func_mask), StencilCompare);
+    setup(OFF(stencil_back_ref), StencilReference);
+    setup(OFF(stencil_back_mask), StencilWriteMask);
+    setup(OFF(stencil_back_func_mask), StencilCompare);
 }
 
 void SetupDirtyLineWidth(Tables& tables) {
@@ -238,9 +244,11 @@ void StateTracker::ChangeChannel(Tegra::Control::ChannelState& channel_state) {
 
 void StateTracker::InvalidateState() {
     flags->set();
+    current_topology = INVALID_TOPOLOGY;
+    stencil_reset = true;
 }
 
 StateTracker::StateTracker()
     : flags{&default_flags}, default_flags{}, invalidation_flags{MakeInvalidationFlags()} {}
 
-} // namespace Vulkan
+} // namespace Vulkan
\ No newline at end of file
diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h
index 7cdc70c60..8010ad26c 100644
--- a/src/video_core/renderer_vulkan/vk_state_tracker.h
+++ b/src/video_core/renderer_vulkan/vk_state_tracker.h
@@ -35,6 +35,9 @@ enum : u8 {
     BlendConstants,
     DepthBounds,
     StencilProperties,
+    StencilReference,
+    StencilWriteMask,
+    StencilCompare,
     LineWidth,
 
     CullMode,
@@ -74,6 +77,7 @@ public:
     void InvalidateCommandBufferState() {
         (*flags) |= invalidation_flags;
         current_topology = INVALID_TOPOLOGY;
+        stencil_reset = true;
     }
 
     void InvalidateViewports() {
@@ -113,6 +117,57 @@ public:
         return Exchange(Dirty::StencilProperties, false);
     }
 
+    bool TouchStencilReference() {
+        return Exchange(Dirty::StencilReference, false);
+    }
+
+    bool TouchStencilWriteMask() {
+        return Exchange(Dirty::StencilWriteMask, false);
+    }
+
+    bool TouchStencilCompare() {
+        return Exchange(Dirty::StencilCompare, false);
+    }
+
+    template <typename T>
+    bool ExchangeCheck(T& old_value, T new_value) {
+        bool result = old_value != new_value;
+        old_value = new_value;
+        return result;
+    }
+
+    bool TouchStencilSide(bool two_sided_stencil_new) {
+        return ExchangeCheck(two_sided_stencil, two_sided_stencil_new) || stencil_reset;
+    }
+
+    bool CheckStencilReferenceFront(u32 new_value) {
+        return ExchangeCheck(front.ref, new_value) || stencil_reset;
+    }
+
+    bool CheckStencilReferenceBack(u32 new_value) {
+        return ExchangeCheck(back.ref, new_value) || stencil_reset;
+    }
+
+    bool CheckStencilWriteMaskFront(u32 new_value) {
+        return ExchangeCheck(front.write_mask, new_value) || stencil_reset;
+    }
+
+    bool CheckStencilWriteMaskBack(u32 new_value) {
+        return ExchangeCheck(back.write_mask, new_value) || stencil_reset;
+    }
+
+    bool CheckStencilCompareMaskFront(u32 new_value) {
+        return ExchangeCheck(front.compare_mask, new_value) || stencil_reset;
+    }
+
+    bool CheckStencilCompareMaskBack(u32 new_value) {
+        return ExchangeCheck(back.compare_mask, new_value) || stencil_reset;
+    }
+
+    void ClearStencilReset() {
+        stencil_reset = false;
+    }
+
     bool TouchLineWidth() const {
         return Exchange(Dirty::LineWidth, false);
     }
@@ -214,10 +269,20 @@ private:
         return is_dirty;
     }
 
+    struct StencilProperties {
+        u32 ref = 0;
+        u32 write_mask = 0;
+        u32 compare_mask = 0;
+    };
+
     Tegra::Engines::Maxwell3D::DirtyState::Flags* flags;
     Tegra::Engines::Maxwell3D::DirtyState::Flags default_flags;
     Tegra::Engines::Maxwell3D::DirtyState::Flags invalidation_flags;
     Maxwell::PrimitiveTopology current_topology = INVALID_TOPOLOGY;
+    bool two_sided_stencil = false;
+    StencilProperties front{};
+    StencilProperties back{};
+    bool stencil_reset = false;
 };
 
 } // namespace Vulkan