From ddd82ef42b7bb7bea4c80edeb96bca8512580df3 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 24 Apr 2020 01:44:14 -0300
Subject: [PATCH] shader/memory_util: Deduplicate code

Deduplicate code shared between vk_pipeline_cache and gl_shader_cache as
well as shader decoder code.

While we are at it, fix a bug in gl_shader_cache where compute shaders
had an start offset of a stage shader.
---
 src/video_core/CMakeLists.txt                 |  2 +
 .../renderer_opengl/gl_shader_cache.cpp       | 82 +++----------------
 .../renderer_vulkan/vk_pipeline_cache.cpp     | 69 +++-------------
 .../renderer_vulkan/vk_pipeline_cache.h       |  8 +-
 src/video_core/shader/control_flow.cpp        | 12 +--
 src/video_core/shader/decode.cpp              | 12 +--
 src/video_core/shader/memory_util.cpp         | 77 +++++++++++++++++
 src/video_core/shader/memory_util.h           | 47 +++++++++++
 src/video_core/shader/shader_ir.h             |  3 +-
 9 files changed, 153 insertions(+), 159 deletions(-)
 create mode 100644 src/video_core/shader/memory_util.cpp
 create mode 100644 src/video_core/shader/memory_util.h

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 8ede4ba9b..ff53282c9 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -124,6 +124,8 @@ add_library(video_core STATIC
     shader/decode.cpp
     shader/expr.cpp
     shader/expr.h
+    shader/memory_util.cpp
+    shader/memory_util.h
     shader/node_helper.cpp
     shader/node_helper.h
     shader/node.h
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index f63156b8d..9759a7078 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -10,8 +10,6 @@
 #include <thread>
 #include <unordered_set>
 
-#include <boost/functional/hash.hpp>
-
 #include "common/alignment.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
@@ -28,76 +26,26 @@
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
 #include "video_core/renderer_opengl/gl_state_tracker.h"
 #include "video_core/renderer_opengl/utils.h"
+#include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 
 namespace OpenGL {
 
 using Tegra::Engines::ShaderType;
+using VideoCommon::Shader::GetShaderAddress;
+using VideoCommon::Shader::GetShaderCode;
+using VideoCommon::Shader::GetUniqueIdentifier;
+using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
 using VideoCommon::Shader::ProgramCode;
 using VideoCommon::Shader::Registry;
 using VideoCommon::Shader::ShaderIR;
+using VideoCommon::Shader::STAGE_MAIN_OFFSET;
 
 namespace {
 
-constexpr u32 STAGE_MAIN_OFFSET = 10;
-constexpr u32 KERNEL_MAIN_OFFSET = 0;
-
 constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
 
-/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
-    const auto& gpu{system.GPU().Maxwell3D()};
-    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
-    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
-}
-
-/// Gets if the current instruction offset is a scheduler instruction
-constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
-    // Sched instructions appear once every 4 instructions.
-    constexpr std::size_t SchedPeriod = 4;
-    const std::size_t absolute_offset = offset - main_offset;
-    return (absolute_offset % SchedPeriod) == 0;
-}
-
-/// Calculates the size of a program stream
-std::size_t CalculateProgramSize(const ProgramCode& program) {
-    constexpr std::size_t start_offset = 10;
-    // This is the encoded version of BRA that jumps to itself. All Nvidia
-    // shaders end with one.
-    constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
-    constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
-    std::size_t offset = start_offset;
-    while (offset < program.size()) {
-        const u64 instruction = program[offset];
-        if (!IsSchedInstruction(offset, start_offset)) {
-            if ((instruction & mask) == self_jumping_branch) {
-                // End on Maxwell's "nop" instruction
-                break;
-            }
-            if (instruction == 0) {
-                break;
-            }
-        }
-        offset++;
-    }
-    // The last instruction is included in the program size
-    return std::min(offset + 1, program.size());
-}
-
-/// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
-                          const u8* host_ptr) {
-    ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
-    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
-        std::fill(code.begin(), code.end(), 0);
-        return code;
-    });
-    memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
-    code.resize(CalculateProgramSize(code));
-    return code;
-}
-
 /// Gets the shader type from a Maxwell program type
 constexpr GLenum GetGLShaderType(ShaderType shader_type) {
     switch (shader_type) {
@@ -114,17 +62,6 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
     }
 }
 
-/// Hashes one (or two) program streams
-u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code,
-                        const ProgramCode& code_b = {}) {
-    u64 unique_identifier = boost::hash_value(code);
-    if (is_a) {
-        // VertexA programs include two programs
-        boost::hash_combine(unique_identifier, boost::hash_value(code_b));
-    }
-    return unique_identifier;
-}
-
 constexpr const char* GetShaderTypeName(ShaderType shader_type) {
     switch (shader_type) {
     case ShaderType::Vertex:
@@ -456,11 +393,12 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
     const auto host_ptr{memory_manager.GetPointer(address)};
 
     // No shader found - create a new one
-    ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
+    ProgramCode code{GetShaderCode(memory_manager, address, host_ptr, false)};
     ProgramCode code_b;
     if (program == Maxwell::ShaderProgram::VertexA) {
         const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
-        code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b));
+        const u8* host_ptr_b = memory_manager.GetPointer(address_b);
+        code_b = GetShaderCode(memory_manager, address_b, host_ptr_b, false);
     }
 
     const auto unique_identifier = GetUniqueIdentifier(
@@ -498,7 +436,7 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
 
     const auto host_ptr{memory_manager.GetPointer(code_addr)};
     // No kernel found, create a new one
-    auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
+    auto code{GetShaderCode(memory_manager, code_addr, host_ptr, true)};
     const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
 
     const ShaderParameters params{system,    disk_cache, device,
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 91b1b16a5..8cedeaeba 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -27,12 +27,18 @@
 #include "video_core/renderer_vulkan/vk_update_descriptor.h"
 #include "video_core/renderer_vulkan/wrapper.h"
 #include "video_core/shader/compiler_settings.h"
+#include "video_core/shader/memory_util.h"
 
 namespace Vulkan {
 
 MICROPROFILE_DECLARE(Vulkan_PipelineCache);
 
 using Tegra::Engines::ShaderType;
+using VideoCommon::Shader::GetShaderAddress;
+using VideoCommon::Shader::GetShaderCode;
+using VideoCommon::Shader::KERNEL_MAIN_OFFSET;
+using VideoCommon::Shader::ProgramCode;
+using VideoCommon::Shader::STAGE_MAIN_OFFSET;
 
 namespace {
 
@@ -45,60 +51,6 @@ constexpr VkDescriptorType STORAGE_IMAGE = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
 constexpr VideoCommon::Shader::CompilerSettings compiler_settings{
     VideoCommon::Shader::CompileDepth::FullDecompile};
 
-/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
-    const auto& gpu{system.GPU().Maxwell3D()};
-    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
-    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
-}
-
-/// Gets if the current instruction offset is a scheduler instruction
-constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
-    // Sched instructions appear once every 4 instructions.
-    constexpr std::size_t SchedPeriod = 4;
-    const std::size_t absolute_offset = offset - main_offset;
-    return (absolute_offset % SchedPeriod) == 0;
-}
-
-/// Calculates the size of a program stream
-std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
-    const std::size_t start_offset = is_compute ? 0 : 10;
-    // This is the encoded version of BRA that jumps to itself. All Nvidia
-    // shaders end with one.
-    constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
-    constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
-    std::size_t offset = start_offset;
-    while (offset < program.size()) {
-        const u64 instruction = program[offset];
-        if (!IsSchedInstruction(offset, start_offset)) {
-            if ((instruction & mask) == self_jumping_branch) {
-                // End on Maxwell's "nop" instruction
-                break;
-            }
-            if (instruction == 0) {
-                break;
-            }
-        }
-        ++offset;
-    }
-    // The last instruction is included in the program size
-    return std::min(offset + 1, program.size());
-}
-
-/// Gets the shader program code from memory for the specified address
-ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
-                          const u8* host_ptr, bool is_compute) {
-    ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
-    ASSERT_OR_EXECUTE(host_ptr != nullptr, {
-        std::fill(program_code.begin(), program_code.end(), 0);
-        return program_code;
-    });
-    memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
-                                   program_code.size() * sizeof(u64));
-    program_code.resize(CalculateProgramSize(program_code, is_compute));
-    return program_code;
-}
-
 constexpr std::size_t GetStageFromProgram(std::size_t program) {
     return program == 0 ? 0 : program - 1;
 }
@@ -212,9 +164,9 @@ std::array<Shader, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
             const auto host_ptr{memory_manager.GetPointer(program_addr)};
 
             // No shader found - create a new one
-            constexpr u32 stage_offset = 10;
+            constexpr u32 stage_offset = STAGE_MAIN_OFFSET;
             const auto stage = static_cast<Tegra::Engines::ShaderType>(index == 0 ? 0 : index - 1);
-            auto code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
+            ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, false);
 
             shader = std::make_shared<CachedShader>(system, stage, program_addr, *cpu_addr,
                                                     std::move(code), stage_offset);
@@ -270,11 +222,10 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
         // No shader found - create a new one
         const auto host_ptr = memory_manager.GetPointer(program_addr);
 
-        auto code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
-        constexpr u32 kernel_main_offset = 0;
+        ProgramCode code = GetShaderCode(memory_manager, program_addr, host_ptr, true);
         shader = std::make_shared<CachedShader>(system, Tegra::Engines::ShaderType::Compute,
                                                 program_addr, *cpu_addr, std::move(code),
-                                                kernel_main_offset);
+                                                KERNEL_MAIN_OFFSET);
         if (cpu_addr) {
             Register(shader);
         } else {
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
index 602a0a340..2cce53162 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h
@@ -25,6 +25,7 @@
 #include "video_core/renderer_vulkan/vk_resource_manager.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/renderer_vulkan/wrapper.h"
+#include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 #include "video_core/surface.h"
@@ -47,8 +48,6 @@ class CachedShader;
 using Shader = std::shared_ptr<CachedShader>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
-using ProgramCode = std::vector<u64>;
-
 struct GraphicsPipelineCacheKey {
     FixedPipelineState fixed_state;
     std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
@@ -113,7 +112,8 @@ namespace Vulkan {
 class CachedShader final : public RasterizerCacheObject {
 public:
     explicit CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr,
-                          VAddr cpu_addr, ProgramCode program_code, u32 main_offset);
+                          VAddr cpu_addr, VideoCommon::Shader::ProgramCode program_code,
+                          u32 main_offset);
     ~CachedShader();
 
     GPUVAddr GetGpuAddr() const {
@@ -145,7 +145,7 @@ private:
                                                                  Tegra::Engines::ShaderType stage);
 
     GPUVAddr gpu_addr{};
-    ProgramCode program_code;
+    VideoCommon::Shader::ProgramCode program_code;
     VideoCommon::Shader::Registry registry;
     VideoCommon::Shader::ShaderIR shader_ir;
     ShaderEntries entries;
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index e00a3fb70..8d86020f6 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -13,6 +13,7 @@
 #include "common/common_types.h"
 #include "video_core/shader/ast.h"
 #include "video_core/shader/control_flow.h"
+#include "video_core/shader/memory_util.h"
 #include "video_core/shader/registry.h"
 #include "video_core/shader/shader_ir.h"
 
@@ -115,17 +116,6 @@ Pred GetPredicate(u32 index, bool negated) {
     return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
 }
 
-/**
- * Returns whether the instruction at the specified offset is a 'sched' instruction.
- * Sched instructions always appear before a sequence of 3 instructions.
- */
-constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
-    constexpr u32 SchedPeriod = 4;
-    u32 absolute_offset = offset - main_offset;
-
-    return (absolute_offset % SchedPeriod) == 0;
-}
-
 enum class ParseResult : u32 {
     ControlCaught,
     BlockEnd,
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 87ac9ac6c..1167ff4ec 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -13,6 +13,7 @@
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_header.h"
 #include "video_core/shader/control_flow.h"
+#include "video_core/shader/memory_util.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 
@@ -23,17 +24,6 @@ using Tegra::Shader::OpCode;
 
 namespace {
 
-/**
- * Returns whether the instruction at the specified offset is a 'sched' instruction.
- * Sched instructions always appear before a sequence of 3 instructions.
- */
-constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
-    constexpr u32 SchedPeriod = 4;
-    u32 absolute_offset = offset - main_offset;
-
-    return (absolute_offset % SchedPeriod) == 0;
-}
-
 void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile& gpu_driver,
                               const std::list<Sampler>& used_samplers) {
     if (gpu_driver.IsTextureHandlerSizeKnown() || used_samplers.size() <= 1) {
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
new file mode 100644
index 000000000..074f21691
--- /dev/null
+++ b/src/video_core/shader/memory_util.cpp
@@ -0,0 +1,77 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <cstddef>
+
+#include <boost/container_hash/hash.hpp>
+
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/memory_manager.h"
+#include "video_core/shader/memory_util.h"
+#include "video_core/shader/shader_ir.h"
+
+namespace VideoCommon::Shader {
+
+GPUVAddr GetShaderAddress(Core::System& system,
+                          Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
+    const auto& gpu{system.GPU().Maxwell3D()};
+    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
+    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+}
+
+bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // Sched instructions appear once every 4 instructions.
+    constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
+std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute) {
+    // This is the encoded version of BRA that jumps to itself. All Nvidia
+    // shaders end with one.
+    static constexpr u64 SELF_JUMPING_BRANCH = 0xE2400FFFFF07000FULL;
+    static constexpr u64 MASK = 0xFFFFFFFFFF7FFFFFULL;
+
+    const std::size_t start_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
+    std::size_t offset = start_offset;
+    while (offset < program.size()) {
+        const u64 instruction = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if ((instruction & MASK) == SELF_JUMPING_BRANCH) {
+                // End on Maxwell's "nop" instruction
+                break;
+            }
+            if (instruction == 0) {
+                break;
+            }
+        }
+        ++offset;
+    }
+    // The last instruction is included in the program size
+    return std::min(offset + 1, program.size());
+}
+
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
+                          const u8* host_ptr, bool is_compute) {
+    ProgramCode code(VideoCommon::Shader::MAX_PROGRAM_LENGTH);
+    ASSERT_OR_EXECUTE(host_ptr != nullptr, { return code; });
+    memory_manager.ReadBlockUnsafe(gpu_addr, code.data(), code.size() * sizeof(u64));
+    code.resize(CalculateProgramSize(code, is_compute));
+    return code;
+}
+
+u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
+                        const ProgramCode& code_b) {
+    u64 unique_identifier = boost::hash_value(code);
+    if (is_a) {
+        // VertexA programs include two programs
+        boost::hash_combine(unique_identifier, boost::hash_value(code_b));
+    }
+    return unique_identifier;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
new file mode 100644
index 000000000..be90d24fd
--- /dev/null
+++ b/src/video_core/shader/memory_util.h
@@ -0,0 +1,47 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstddef>
+#include <vector>
+
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/engines/shader_type.h"
+
+namespace Core {
+class System;
+}
+
+namespace Tegra {
+class MemoryManager;
+}
+
+namespace VideoCommon::Shader {
+
+using ProgramCode = std::vector<u64>;
+
+constexpr u32 STAGE_MAIN_OFFSET = 10;
+constexpr u32 KERNEL_MAIN_OFFSET = 0;
+
+/// Gets the address for the specified shader stage program
+GPUVAddr GetShaderAddress(Core::System& system,
+                          Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
+
+/// Gets if the current instruction offset is a scheduler instruction
+bool IsSchedInstruction(std::size_t offset, std::size_t main_offset);
+
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const ProgramCode& program, bool is_compute);
+
+/// Gets the shader program code from memory for the specified address
+ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, GPUVAddr gpu_addr,
+                          const u8* host_ptr, bool is_compute);
+
+/// Hashes one (or two) program streams
+u64 GetUniqueIdentifier(Tegra::Engines::ShaderType shader_type, bool is_a, const ProgramCode& code,
+                        const ProgramCode& code_b = {});
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index c6e7bdf50..69de5e68b 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -18,6 +18,7 @@
 #include "video_core/engines/shader_header.h"
 #include "video_core/shader/ast.h"
 #include "video_core/shader/compiler_settings.h"
+#include "video_core/shader/memory_util.h"
 #include "video_core/shader/node.h"
 #include "video_core/shader/registry.h"
 
@@ -25,8 +26,6 @@ namespace VideoCommon::Shader {
 
 struct ShaderBlock;
 
-using ProgramCode = std::vector<u64>;
-
 constexpr u32 MAX_PROGRAM_LENGTH = 0x1000;
 
 class ConstBuffer {