From 46177901b8d76f16223ac2716abcd6393faae25a Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 23 May 2019 20:19:56 -0300
Subject: [PATCH 1/2] gl_device: Add test to detect broken component indexing

Component indexing on AMD's proprietary driver is broken. This commit adds
a test to detect when we are on a driver that can't successfully manage
component indexing.

It dispatches a dummy draw with just one vertex shader that writes to an
indexed SSBO from the GPU with data sent through uniforms, it then reads
that data from the CPU and compares the expected output.
---
 src/video_core/renderer_opengl/gl_device.cpp | 54 ++++++++++++++++++++
 src/video_core/renderer_opengl/gl_device.h   |  6 +++
 2 files changed, 60 insertions(+)

diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 38497678a..5f86574cd 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -2,11 +2,14 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <array>
 #include <cstddef>
 #include <glad/glad.h>
 
 #include "common/logging/log.h"
+#include "common/scope_exit.h"
 #include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
 
 namespace OpenGL {
 
@@ -24,6 +27,7 @@ Device::Device() {
     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
     max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
     has_variable_aoffi = TestVariableAoffi();
+    has_component_indexing_bug = TestComponentIndexingBug();
 }
 
 Device::Device(std::nullptr_t) {
@@ -31,6 +35,7 @@ Device::Device(std::nullptr_t) {
     max_vertex_attributes = 16;
     max_varyings = 15;
     has_variable_aoffi = true;
+    has_component_indexing_bug = false;
 }
 
 bool Device::TestVariableAoffi() {
@@ -51,4 +56,53 @@ void main() {
     return supported;
 }
 
+bool Device::TestComponentIndexingBug() {
+    constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}";
+    const GLchar* COMPONENT_TEST = R"(#version 430 core
+layout (std430, binding = 0) buffer OutputBuffer {
+    uint output_value;
+};
+layout (std140, binding = 0) uniform InputBuffer {
+    uvec4 input_value[4096];
+};
+layout (location = 0) uniform uint idx;
+void main() {
+    output_value = input_value[idx >> 2][idx & 3];
+})";
+    const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)};
+    SCOPE_EXIT({ glDeleteProgram(shader); });
+    glUseProgram(shader);
+
+    OGLVertexArray vao;
+    vao.Create();
+    glBindVertexArray(vao.handle);
+
+    constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432};
+    OGLBuffer ubo;
+    ubo.Create();
+    glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW);
+    glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle);
+
+    OGLBuffer ssbo;
+    ssbo.Create();
+    glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT);
+
+    for (GLuint index = 4; index < 8; ++index) {
+        glInvalidateBufferData(ssbo.handle);
+        glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle);
+
+        glProgramUniform1ui(shader, 0, index);
+        glDrawArrays(GL_POINTS, 0, 1);
+
+        GLuint result;
+        glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result);
+        if (result != values.at(index)) {
+            LOG_INFO(Render_OpenGL, log_message, true);
+            return true;
+        }
+    }
+    LOG_INFO(Render_OpenGL, log_message, false);
+    return false;
+}
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index de8490682..8c8c93760 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -30,13 +30,19 @@ public:
         return has_variable_aoffi;
     }
 
+    bool HasComponentIndexingBug() const {
+        return has_component_indexing_bug;
+    }
+
 private:
     static bool TestVariableAoffi();
+    static bool TestComponentIndexingBug();
 
     std::size_t uniform_buffer_alignment{};
     u32 max_vertex_attributes{};
     u32 max_varyings{};
     bool has_variable_aoffi{};
+    bool has_component_indexing_bug{};
 };
 
 } // namespace OpenGL

From d8827b07b5c823903609e00b8f0e1c27fa34fb91 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 23 May 2019 20:27:54 -0300
Subject: [PATCH 2/2] gl_shader_decompiler: Use an if based cbuf indexing for
 broken drivers

The following code is broken on AMD's proprietary GLSL compiler:
```glsl
uint idx = ...;
vec4 values = ...;
float some_value = values[idx & 3];
```

It index the wrong components, to fix this the following pessimized code
is emitted when that bug is present:
```glsl
uint idx = ...;
vec4 values = ...;
float some_value;
if ((idx & 3) == 0) some_value = values.x;
if ((idx & 3) == 1) some_value = values.y;
if ((idx & 3) == 2) some_value = values.z;
if ((idx & 3) == 3) some_value = values.w;
```
---
 .../renderer_opengl/gl_shader_decompiler.cpp  | 23 ++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 6d4658c8b..4a40e4843 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -577,9 +577,26 @@ private:
             if (std::holds_alternative<OperationNode>(*offset)) {
                 // Indirect access
                 const std::string final_offset = code.GenerateTemporary();
-                code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset));
-                return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
-                                   final_offset, final_offset);
+                code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset));
+
+                if (!device.HasComponentIndexingBug()) {
+                    return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
+                                       final_offset, final_offset);
+                }
+
+                // AMD's proprietary GLSL compiler emits ill code for variable component access.
+                // To bypass this driver bug generate 4 ifs, one per each component.
+                const std::string pack = code.GenerateTemporary();
+                code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
+                             final_offset);
+
+                const std::string result = code.GenerateTemporary();
+                code.AddLine("float {};", result);
+                for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
+                    code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
+                                 pack, GetSwizzle(swizzle));
+                }
+                return result;
             }
 
             UNREACHABLE_MSG("Unmanaged offset node type");