From f7ec078592468fa22ff377b996a720c8be82c2dc Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 29 Dec 2019 02:03:05 -0300
Subject: [PATCH] gl_state_tracker: Implement dirty flags for clip distances
 and shaders

---
 .../renderer_opengl/gl_rasterizer.cpp         | 28 ++++++++++++-------
 .../renderer_opengl/gl_rasterizer.h           |  5 ++--
 .../renderer_opengl/gl_shader_cache.cpp       |  5 ++++
 .../renderer_opengl/gl_shader_decompiler.cpp  |  5 +++-
 .../renderer_opengl/gl_shader_decompiler.h    |  2 +-
 .../renderer_opengl/gl_state_tracker.cpp      | 11 ++++++++
 .../renderer_opengl/gl_state_tracker.h        |  1 +
 7 files changed, 43 insertions(+), 14 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index bb89985cc..717f127e9 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -232,8 +232,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
 void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
     MICROPROFILE_SCOPE(OpenGL_Shader);
     auto& gpu = system.GPU().Maxwell3D();
-
-    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+    u32 clip_distances = 0;
 
     for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         const auto& shader_config = gpu.regs.shader_config[index];
@@ -294,9 +293,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
         // When a clip distance is enabled but not set in the shader it crops parts of the screen
         // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
         // clip distances only when it's written by a shader stage.
-        for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) {
-            clip_distances[i] = clip_distances[i] || shader->GetShaderEntries().clip_distances[i];
-        }
+        clip_distances |= shader->GetShaderEntries().clip_distances;
 
         // When VertexA is enabled, we have dual vertex shaders
         if (program == Maxwell::ShaderProgram::VertexA) {
@@ -306,6 +303,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
     }
 
     SyncClipEnabled(clip_distances);
+    gpu.dirty.flags[Dirty::Shaders] = false;
 }
 
 std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -972,12 +970,22 @@ void RasterizerOpenGL::SyncDepthClamp() {
     oglEnable(GL_DEPTH_CLAMP, state.depth_clamp_far || state.depth_clamp_near);
 }
 
-void RasterizerOpenGL::SyncClipEnabled(
-    const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
+    auto& gpu = system.GPU().Maxwell3D();
+    auto& flags = gpu.dirty.flags;
+    if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
+        return;
+    }
+    flags[Dirty::ClipDistances] = false;
+
+    clip_mask &= gpu.regs.clip_distance_enabled;
+    if (clip_mask == last_clip_distance_mask) {
+        return;
+    }
+    last_clip_distance_mask = clip_mask;
+
     for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) {
-        oglEnable(static_cast<GLenum>(GL_CLIP_DISTANCE0 + i),
-                  clip_mask[i] && ((regs.clip_distance_enabled >> i) & 1));
+        oglEnable(static_cast<GLenum>(GL_CLIP_DISTANCE0 + i), (clip_mask >> i) & 1);
     }
 }
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 22a3a3352..11206f557 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -137,8 +137,7 @@ private:
     void SyncDepthClamp();
 
     /// Syncs the clip enabled status to match the guest state
-    void SyncClipEnabled(
-        const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask);
+    void SyncClipEnabled(u32 clip_mask);
 
     /// Syncs the clip coefficients to match the guest state
     void SyncClipCoef();
@@ -230,6 +229,8 @@ private:
 
     /// Number of commands queued to the OpenGL driver. Reseted on flush.
     std::size_t num_queued_commands = 0;
+
+    u32 last_clip_distance_mask = 0;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index bef141f63..4cb89db8c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -22,6 +22,7 @@
 #include "video_core/renderer_opengl/gl_shader_cache.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
 #include "video_core/renderer_opengl/utils.h"
 #include "video_core/shader/shader_ir.h"
 
@@ -623,6 +624,10 @@ bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
 }
 
 Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
+    if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
+        return last_shaders[static_cast<std::size_t>(program)];
+    }
+
     auto& memory_manager{system.GPU().MemoryManager()};
     const GPUVAddr address{GetShaderAddress(system, program)};
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 4735000b5..3a41ed30c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -2547,7 +2547,10 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
     for (const auto& image : ir.GetImages()) {
         entries.images.emplace_back(image);
     }
-    entries.clip_distances = ir.GetClipDistances();
+    const auto clip_distances = ir.GetClipDistances();
+    for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
+        entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
+    }
     entries.shader_length = ir.GetLength();
     return entries;
 }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 7876f48d6..0f692c1db 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -74,7 +74,7 @@ struct ShaderEntries {
     std::vector<GlobalMemoryEntry> global_memory_entries;
     std::vector<SamplerEntry> samplers;
     std::vector<ImageEntry> images;
-    std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+    u32 clip_distances{};
     std::size_t shader_length{};
 };
 
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
index 7150b9247..bc5942a7f 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.cpp
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -124,6 +124,15 @@ void SetupDirtyScissors(Tables& tables) {
     FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
 }
 
+void SetupDirtyShaders(Tables& tables) {
+    FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
+              Shaders);
+}
+
+void SetupDirtyMisc(Tables& tables) {
+    tables[0][OFF(clip_distance_enabled)] = ClipDistances;
+}
+
 } // Anonymous namespace
 
 StateTracker::StateTracker(Core::System& system) : system{system} {}
@@ -137,6 +146,8 @@ void StateTracker::Initialize() {
     SetupDirtyScissors(tables);
     SetupDirtyVertexArrays(tables);
     SetupDirtyVertexFormat(tables);
+    SetupDirtyShaders(tables);
+    SetupDirtyMisc(tables);
 
     auto& store = dirty.on_write_stores;
     store[RenderTargets] = true;
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
index 85667cee1..11fdc6de4 100644
--- a/src/video_core/renderer_opengl/gl_state_tracker.h
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -48,6 +48,7 @@ enum : u8 {
     ColorMask7 = ColorMask0 + 7,
 
     Shaders,
+    ClipDistances,
     CullTestEnable,
     FrontFace,
     CullFace,