From 10682ad7e055391757686e91252dabe2832d58cd Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 12 Feb 2019 21:14:39 -0400
Subject: [PATCH] shader_decompiler: Improve Accuracy of Attribute
 Interpolation.

---
 src/video_core/engines/shader_bytecode.h      |  6 +--
 src/video_core/engines/shader_header.h        | 41 +++++++++++++++--
 .../renderer_opengl/gl_shader_decompiler.cpp  | 44 +++++++------------
 .../renderer_opengl/gl_shader_gen.cpp         |  4 +-
 src/video_core/shader/decode/memory.cpp       |  2 +-
 src/video_core/shader/decode/other.cpp        | 15 ++++++-
 6 files changed, 74 insertions(+), 38 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 1f425f90b..252592edd 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -376,9 +376,9 @@ enum class R2pMode : u64 {
 };
 
 enum class IpaInterpMode : u64 {
-    Linear = 0,
-    Perspective = 1,
-    Flat = 2,
+    Pass = 0,
+    Multiply = 1,
+    Constant = 2,
     Sc = 3,
 };
 
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index cf2b76ff6..e86a7f04a 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -16,6 +16,13 @@ enum class OutputTopology : u32 {
     TriangleStrip = 7,
 };
 
+enum class AttributeUse : u8 {
+    Unused = 0,
+    Constant = 1,
+    Perspective = 2,
+    ScreenLinear = 3,
+};
+
 // Documentation in:
 // http://download.nvidia.com/open-gpu-doc/Shader-Program-Header/1/Shader-Program-Header.html#ImapTexture
 struct Header {
@@ -84,9 +91,15 @@ struct Header {
         } vtg;
 
         struct {
-            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA
-            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB
-            INSERT_PADDING_BYTES(32); // ImapGenericVector[32]
+            INSERT_PADDING_BYTES(3); // ImapSystemValuesA
+            INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+            union {
+                BitField<0, 2, AttributeUse> x;
+                BitField<2, 2, AttributeUse> y;
+                BitField<4, 2, AttributeUse> w;
+                BitField<6, 2, AttributeUse> z;
+                u8 raw;
+            } imap_generic_vector[32];
             INSERT_PADDING_BYTES(2);  // ImapColor
             INSERT_PADDING_BYTES(2);  // ImapSystemValuesC
             INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
@@ -103,6 +116,28 @@ struct Header {
                 const u32 bit = render_target * 4 + component;
                 return omap.target & (1 << bit);
             }
+            AttributeUse GetAttributeIndexUse(u32 attribute, u32 index) const {
+                return static_cast<AttributeUse>(
+                    (imap_generic_vector[attribute].raw >> (index * 2)) & 0x03);
+            }
+            AttributeUse GetAttributeUse(u32 attribute) const {
+                AttributeUse result = AttributeUse::Unused;
+                for (u32 i = 0; i < 4; i++) {
+                    const auto index = GetAttributeIndexUse(attribute, i);
+                    if (index == AttributeUse::Unused) {
+                        continue;
+                    }
+                    if (result == AttributeUse::Unused || result == index) {
+                        result = index;
+                        continue;
+                    }
+                    LOG_CRITICAL(HW_GPU, "Generic Attribute Conflict in Interpolation Mode");
+                    if (index == AttributeUse::Perspective) {
+                        result = index;
+                    }
+                }
+                return result;
+            }
         } ps;
     };
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index b39bb4843..f4140624e 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -20,6 +20,7 @@
 namespace OpenGL::GLShader {
 
 using Tegra::Shader::Attribute;
+using Tegra::Shader::AttributeUse;
 using Tegra::Shader::Header;
 using Tegra::Shader::IpaInterpMode;
 using Tegra::Shader::IpaMode;
@@ -288,34 +289,22 @@ private:
         code.AddNewLine();
     }
 
-    std::string GetInputFlags(const IpaMode& input_mode) {
-        const IpaSampleMode sample_mode = input_mode.sampling_mode;
-        const IpaInterpMode interp_mode = input_mode.interpolation_mode;
+    std::string GetInputFlags(AttributeUse attribute) {
         std::string out;
 
-        switch (interp_mode) {
-        case IpaInterpMode::Flat:
+        switch (attribute) {
+        case AttributeUse::Constant:
             out += "flat ";
             break;
-        case IpaInterpMode::Linear:
+        case AttributeUse::ScreenLinear:
             out += "noperspective ";
             break;
-        case IpaInterpMode::Perspective:
+        case AttributeUse::Perspective:
             // Default, Smooth
             break;
         default:
-            UNIMPLEMENTED_MSG("Unhandled IPA interp mode: {}", static_cast<u32>(interp_mode));
-        }
-        switch (sample_mode) {
-        case IpaSampleMode::Centroid:
-            // It can be implemented with the "centroid " keyword in GLSL
-            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode centroid");
-            break;
-        case IpaSampleMode::Default:
-            // Default, n/a
-            break;
-        default:
-            UNIMPLEMENTED_MSG("Unimplemented IPA sampler mode: {}", static_cast<u32>(sample_mode));
+            LOG_CRITICAL(HW_GPU, "Unused attribute being fetched");
+            UNREACHABLE();
         }
         return out;
     }
@@ -324,16 +313,11 @@ private:
         const auto& attributes = ir.GetInputAttributes();
         for (const auto element : attributes) {
             const Attribute::Index index = element.first;
-            const IpaMode& input_mode = *element.second.begin();
             if (index < Attribute::Index::Attribute_0 || index > Attribute::Index::Attribute_31) {
                 // Skip when it's not a generic attribute
                 continue;
             }
 
-            ASSERT(element.second.size() > 0);
-            UNIMPLEMENTED_IF_MSG(element.second.size() > 1,
-                                 "Multiple input flag modes are not supported in GLSL");
-
             // TODO(bunnei): Use proper number of elements for these
             u32 idx = static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
             if (stage != ShaderStage::Vertex) {
@@ -345,8 +329,14 @@ private:
             if (stage == ShaderStage::Geometry) {
                 attr = "gs_" + attr + "[]";
             }
-            code.AddLine("layout (location = " + std::to_string(idx) + ") " +
-                         GetInputFlags(input_mode) + "in vec4 " + attr + ';');
+            std::string suffix;
+            if (stage == ShaderStage::Fragment) {
+                const auto input_mode =
+                    header.ps.GetAttributeUse(idx - GENERIC_VARYING_START_LOCATION);
+                suffix = GetInputFlags(input_mode);
+            }
+            code.AddLine("layout (location = " + std::to_string(idx) + ") " + suffix + "in vec4 " +
+                         attr + ';');
         }
         if (!attributes.empty())
             code.AddNewLine();
@@ -1571,4 +1561,4 @@ ProgramResult Decompile(const ShaderIR& ir, Maxwell::ShaderStage stage, const st
     return {decompiler.GetResult(), decompiler.GetShaderEntries()};
 }
 
-} // namespace OpenGL::GLShader
\ No newline at end of file
+} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 04e1db911..7d96649af 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -124,7 +124,7 @@ layout (location = 5) out vec4 FragColor5;
 layout (location = 6) out vec4 FragColor6;
 layout (location = 7) out vec4 FragColor7;
 
-layout (location = 0) in vec4 position;
+layout (location = 0) in noperspective vec4 position;
 
 layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
     vec4 viewport_flip;
@@ -172,4 +172,4 @@ void main() {
     return {out, program.second};
 }
 
-} // namespace OpenGL::GLShader
\ No newline at end of file
+} // namespace OpenGL::GLShader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 523421794..9a1d1de94 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -48,7 +48,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                              "Unaligned attribute loads are not supported");
 
-        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Perspective,
+        Tegra::Shader::IpaMode input_mode{Tegra::Shader::IpaInterpMode::Pass,
                                           Tegra::Shader::IpaSampleMode::Default};
 
         u64 next_element = instr.attribute.fmt20.element;
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index f9502e3d0..d750a2936 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -135,7 +135,18 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
                                                 instr.ipa.sample_mode.Value()};
 
         const Node attr = GetInputAttribute(attribute.index, attribute.element, input_mode);
-        const Node value = GetSaturatedFloat(attr, instr.ipa.saturate);
+        Node value = attr;
+        const Tegra::Shader::Attribute::Index index = attribute.index.Value();
+        if (index >= Tegra::Shader::Attribute::Index::Attribute_0 &&
+            index <= Tegra::Shader::Attribute::Index::Attribute_31) {
+            // TODO(Blinkhawk): There are cases where a perspective attribute use PASS.
+            // In theory by setting them as perspective, OpenGL does the perspective correction.
+            // A way must figured to reverse the last step of it.
+            if (input_mode.interpolation_mode == Tegra::Shader::IpaInterpMode::Multiply) {
+                value = Operation(OperationCode::FMul, PRECISE, value, GetRegister(instr.gpr20));
+            }
+        }
+        value = GetSaturatedFloat(value, instr.ipa.saturate);
 
         SetRegister(bb, instr.gpr0, value);
         break;
@@ -175,4 +186,4 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
     return pc;
 }
 
-} // namespace VideoCommon::Shader
\ No newline at end of file
+} // namespace VideoCommon::Shader