From 114d6b2f97eb62c7d8c958ebb391b70b026130f9 Mon Sep 17 00:00:00 2001
From: Yuri Kunde Schlesner <yuriks@yuriks.net>
Date: Sat, 17 Dec 2016 01:21:16 -0800
Subject: [PATCH] VideoCore/Shader: Split interpreter and JIT into separate
 ShaderEngines

---
 src/video_core/CMakeLists.txt                |   2 +
 src/video_core/pica.cpp                      |   2 +-
 src/video_core/shader/shader.cpp             | 106 ++++---------------
 src/video_core/shader/shader.h               |   5 +-
 src/video_core/shader/shader_interpreter.cpp |  39 ++++++-
 src/video_core/shader/shader_interpreter.h   |  19 ++--
 src/video_core/shader/shader_jit_x64.cpp     |  56 ++++++++++
 src/video_core/shader/shader_jit_x64.h       |  35 ++++++
 8 files changed, 160 insertions(+), 104 deletions(-)
 create mode 100644 src/video_core/shader/shader_jit_x64.cpp
 create mode 100644 src/video_core/shader/shader_jit_x64.h

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 36397cce9..d55b84ce0 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -50,9 +50,11 @@ set(HEADERS
 
 if(ARCHITECTURE_x86_64)
     set(SRCS ${SRCS}
+            shader/shader_jit_x64.cpp
             shader/shader_jit_x64_compiler.cpp)
 
     set(HEADERS ${HEADERS}
+            shader/shader_jit_x64.h
             shader/shader_jit_x64_compiler.h)
 endif()
 
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index ce2bd455e..b4a77c632 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -499,7 +499,7 @@ void Init() {
 }
 
 void Shutdown() {
-    Shader::ClearCache();
+    Shader::Shutdown();
 }
 
 template <typename T>
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 97c6519d6..b30dae476 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -2,14 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <atomic>
 #include <cmath>
 #include <cstring>
-#include <unordered_map>
-#include <utility>
-#include <boost/range/algorithm/fill.hpp>
-#include "common/bit_field.h"
-#include "common/hash.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
 #include "video_core/pica.h"
@@ -17,7 +11,7 @@
 #include "video_core/shader/shader.h"
 #include "video_core/shader/shader_interpreter.h"
 #ifdef ARCHITECTURE_x86_64
-#include "video_core/shader/shader_jit_x64_compiler.h"
+#include "video_core/shader/shader_jit_x64.h"
 #endif // ARCHITECTURE_x86_64
 #include "video_core/video_core.h"
 
@@ -87,91 +81,31 @@ void UnitState::LoadInputVertex(const InputVertex& input, int num_attributes) {
     conditional_code[1] = false;
 }
 
-class MergedShaderEngine : public ShaderEngine {
-public:
-    void SetupBatch(const ShaderSetup* setup) override;
-    void Run(UnitState& state, unsigned int entry_point) const override;
-    DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
-        unsigned int entry_point) const override;
-
-private:
-    const ShaderSetup* setup = nullptr;
-};
-
-#ifdef ARCHITECTURE_x86_64
-static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
-static const JitShader* jit_shader;
-#endif // ARCHITECTURE_x86_64
-
-void ClearCache() {
-#ifdef ARCHITECTURE_x86_64
-    shader_map.clear();
-#endif // ARCHITECTURE_x86_64
-}
-
-void MergedShaderEngine::SetupBatch(const ShaderSetup* setup_) {
-    setup = setup_;
-    if (setup == nullptr)
-        return;
-
-#ifdef ARCHITECTURE_x86_64
-    if (VideoCore::g_shader_jit_enabled) {
-        u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code));
-        u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data));
-
-        u64 cache_key = code_hash ^ swizzle_hash;
-        auto iter = shader_map.find(cache_key);
-        if (iter != shader_map.end()) {
-            jit_shader = iter->second.get();
-        } else {
-            auto shader = std::make_unique<JitShader>();
-            shader->Compile();
-            jit_shader = shader.get();
-            shader_map[cache_key] = std::move(shader);
-        }
-    }
-#endif // ARCHITECTURE_x86_64
-}
-
 MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
 
-void MergedShaderEngine::Run(UnitState& state, unsigned int entry_point) const {
-    ASSERT(setup != nullptr);
-    ASSERT(entry_point < 1024);
-
-    MICROPROFILE_SCOPE(GPU_Shader);
-
 #ifdef ARCHITECTURE_x86_64
-    if (VideoCore::g_shader_jit_enabled) {
-        jit_shader->Run(*setup, state, entry_point);
-    } else {
-        DebugData<false> dummy_debug_data;
-        RunInterpreter(*setup, state, dummy_debug_data, entry_point);
-    }
-#else
-    DebugData<false> dummy_debug_data;
-    RunInterpreter(*setup, state, dummy_debug_data, entry_point);
+static std::unique_ptr<JitX64Engine> jit_engine;
 #endif // ARCHITECTURE_x86_64
-}
-
-DebugData<true> MergedShaderEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes,
-                                                     unsigned int entry_point) const {
-    ASSERT(setup != nullptr);
-    ASSERT(entry_point < 1024);
-
-    UnitState state;
-    DebugData<true> debug_data;
-
-    // Setup input register table
-    boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
-    state.LoadInputVertex(input, num_attributes);
-    RunInterpreter(*setup, state, debug_data, entry_point);
-    return debug_data;
-}
+static InterpreterEngine interpreter_engine;
 
 ShaderEngine* GetEngine() {
-    static MergedShaderEngine merged_engine;
-    return &merged_engine;
+#ifdef ARCHITECTURE_x86_64
+    // TODO(yuriks): Re-initialize on each change rather than being persistent
+    if (VideoCore::g_shader_jit_enabled) {
+        if (jit_engine == nullptr) {
+            jit_engine = std::make_unique<JitX64Engine>();
+        }
+        return jit_engine.get();
+    }
+#endif // ARCHITECTURE_x86_64
+
+    return &interpreter_engine;
+}
+
+void Shutdown() {
+#ifdef ARCHITECTURE_x86_64
+    jit_engine = nullptr;
+#endif // ARCHITECTURE_x86_64
 }
 
 } // namespace Shader
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 899fb2607..2afd1024f 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -6,7 +6,6 @@
 
 #include <array>
 #include <cstddef>
-#include <memory>
 #include <type_traits>
 #include <nihstro/shader_bytecode.h>
 #include "common/assert.h"
@@ -152,9 +151,6 @@ struct UnitState {
     void LoadInputVertex(const InputVertex& input, int num_attributes);
 };
 
-/// Clears the shader cache
-void ClearCache();
-
 struct ShaderSetup {
     struct {
         // The float uniforms are accessed by the shader JIT using SSE instructions, and are
@@ -210,6 +206,7 @@ public:
 
 // TODO(yuriks): Remove and make it non-global state somewhere
 ShaderEngine* GetEngine();
+void Shutdown();
 
 } // namespace Shader
 
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 20fb9754b..8e2b8c548 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -7,10 +7,12 @@
 #include <cmath>
 #include <numeric>
 #include <boost/container/static_vector.hpp>
+#include <boost/range/algorithm/fill.hpp>
 #include <nihstro/shader_bytecode.h>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "common/microprofile.h"
 #include "common/vector_math.h"
 #include "video_core/pica_state.h"
 #include "video_core/pica_types.h"
@@ -37,8 +39,8 @@ struct CallStackElement {
 };
 
 template <bool Debug>
-void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
-                    unsigned offset) {
+static void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
+                           unsigned offset) {
     // TODO: Is there a maximal size for this?
     boost::container::static_vector<CallStackElement, 16> call_stack;
     u32 program_counter = offset;
@@ -647,9 +649,36 @@ void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>
     }
 }
 
-// Explicit instantiation
-template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<false>&, unsigned offset);
-template void RunInterpreter(const ShaderSetup&, UnitState&, DebugData<true>&, unsigned offset);
+void InterpreterEngine::SetupBatch(const ShaderSetup* setup_) {
+    setup = setup_;
+}
+
+MICROPROFILE_DECLARE(GPU_Shader);
+
+void InterpreterEngine::Run(UnitState& state, unsigned int entry_point) const {
+    ASSERT(setup != nullptr);
+    ASSERT(entry_point < 1024);
+
+    MICROPROFILE_SCOPE(GPU_Shader);
+
+    DebugData<false> dummy_debug_data;
+    RunInterpreter(*setup, state, dummy_debug_data, entry_point);
+}
+
+DebugData<true> InterpreterEngine::ProduceDebugInfo(const InputVertex& input, int num_attributes,
+                                                    unsigned int entry_point) const {
+    ASSERT(setup != nullptr);
+    ASSERT(entry_point < 1024);
+
+    UnitState state;
+    DebugData<true> debug_data;
+
+    // Setup input register table
+    boost::fill(state.registers.input, Math::Vec4<float24>::AssignToAll(float24::Zero()));
+    state.LoadInputVertex(input, num_attributes);
+    RunInterpreter(*setup, state, debug_data, entry_point);
+    return debug_data;
+}
 
 } // namespace
 
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index 3237b50b3..43c1ed5ea 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -4,19 +4,22 @@
 
 #pragma once
 
+#include "video_core/shader/shader.h"
+
 namespace Pica {
 
 namespace Shader {
 
-struct ShaderSetup;
-struct UnitState;
+class InterpreterEngine final : public ShaderEngine {
+public:
+    void SetupBatch(const ShaderSetup* setup) override;
+    void Run(UnitState& state, unsigned int entry_point) const override;
+    DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
+                                     unsigned int entry_point) const override;
 
-template <bool Debug>
-struct DebugData;
-
-template <bool Debug>
-void RunInterpreter(const ShaderSetup& setup, UnitState& state, DebugData<Debug>& debug_data,
-                    unsigned offset);
+private:
+    const ShaderSetup* setup = nullptr;
+};
 
 } // namespace
 
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
new file mode 100644
index 000000000..fea79538a
--- /dev/null
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -0,0 +1,56 @@
+// Copyright 2016 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/hash.h"
+#include "common/microprofile.h"
+#include "video_core/shader/shader.h"
+#include "video_core/shader/shader_jit_x64.h"
+#include "video_core/shader/shader_jit_x64_compiler.h"
+
+namespace Pica {
+namespace Shader {
+
+JitX64Engine::JitX64Engine() = default;
+JitX64Engine::~JitX64Engine() = default;
+
+void JitX64Engine::SetupBatch(const ShaderSetup* setup_) {
+    cached_shader = nullptr;
+    setup = setup_;
+    if (setup == nullptr)
+        return;
+
+    u64 code_hash = Common::ComputeHash64(&setup->program_code, sizeof(setup->program_code));
+    u64 swizzle_hash = Common::ComputeHash64(&setup->swizzle_data, sizeof(setup->swizzle_data));
+
+    u64 cache_key = code_hash ^ swizzle_hash;
+    auto iter = cache.find(cache_key);
+    if (iter != cache.end()) {
+        cached_shader = iter->second.get();
+    } else {
+        auto shader = std::make_unique<JitShader>();
+        shader->Compile();
+        cached_shader = shader.get();
+        cache.emplace_hint(iter, cache_key, std::move(shader));
+    }
+}
+
+MICROPROFILE_DECLARE(GPU_Shader);
+
+void JitX64Engine::Run(UnitState& state, unsigned int entry_point) const {
+    ASSERT(setup != nullptr);
+    ASSERT(cached_shader != nullptr);
+    ASSERT(entry_point < 1024);
+
+    MICROPROFILE_SCOPE(GPU_Shader);
+
+    cached_shader->Run(*setup, state, entry_point);
+}
+
+DebugData<true> JitX64Engine::ProduceDebugInfo(const InputVertex& input, int num_attributes,
+                                               unsigned int entry_point) const {
+    UNIMPLEMENTED_MSG("Shader tracing/debugging is not supported by the JIT.");
+}
+
+} // namespace Shader
+} // namespace Pica
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
new file mode 100644
index 000000000..df18de2c2
--- /dev/null
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -0,0 +1,35 @@
+// Copyright 2016 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+#include "common/common_types.h"
+#include "video_core/shader/shader.h"
+
+namespace Pica {
+namespace Shader {
+
+class JitShader;
+
+class JitX64Engine final : public ShaderEngine {
+public:
+    JitX64Engine();
+    ~JitX64Engine() override;
+
+    void SetupBatch(const ShaderSetup* setup) override;
+    void Run(UnitState& state, unsigned int entry_point) const override;
+    DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes,
+                                     unsigned int entry_point) const override;
+
+private:
+    const ShaderSetup* setup = nullptr;
+
+    std::unordered_map<u64, std::unique_ptr<JitShader>> cache;
+    const JitShader* cached_shader = nullptr;
+};
+
+} // namespace Shader
+} // namespace Pica