From 0be835132cce4455f5b770bffb34dc0292432383 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 2 Feb 2019 23:43:11 -0300
Subject: [PATCH 1/4] shader_ir/memory: Add LD_L 64 bits loads

---
 src/video_core/shader/decode/memory.cpp | 21 ++++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 4d075f088..63965525c 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -104,16 +104,27 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
     }
     case OpCode::Id::LD_L: {
         UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
-                             static_cast<unsigned>(instr.ld_l.unknown.Value()));
+                             static_cast<u32>(instr.ld_l.unknown.Value()));
 
-        const Node index = Operation(OperationCode::IAdd, GetRegister(instr.gpr8),
-                                     Immediate(static_cast<s32>(instr.smem_imm)));
-        const Node lmem = GetLocalMemory(index);
+        const auto GetLmem = [&](s32 offset) {
+            ASSERT(offset % 4 == 0);
+            const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
+            const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
+                                           immediate_offset);
+            return GetLocalMemory(address);
+        };
 
         switch (instr.ldst_sl.type.Value()) {
         case Tegra::Shader::StoreType::Bytes32:
-            SetRegister(bb, instr.gpr0, lmem);
+            SetRegister(bb, instr.gpr0, GetLmem(0));
             break;
+        case Tegra::Shader::StoreType::Bytes64: {
+            SetTemporal(bb, 0, GetLmem(0));
+            SetTemporal(bb, 1, GetLmem(4));
+            SetRegister(bb, instr.gpr0, GetTemporal(0));
+            SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1));
+            break;
+        }
         default:
             UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
                               static_cast<unsigned>(instr.ldst_sl.type.Value()));

From 9feb68085d05a265fd9ec7f26791390516cd3bd6 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sat, 2 Feb 2019 23:44:38 -0300
Subject: [PATCH 2/4] shader_bytecode: Rename BytesN enums to BitsN

---
 src/video_core/engines/shader_bytecode.h | 6 +++---
 src/video_core/shader/decode/memory.cpp  | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 713b01c9f..2f5a966d2 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -217,9 +217,9 @@ enum class StoreType : u64 {
     Signed8 = 1,
     Unsigned16 = 2,
     Signed16 = 3,
-    Bytes32 = 4,
-    Bytes64 = 5,
-    Bytes128 = 6,
+    Bits32 = 4,
+    Bits64 = 5,
+    Bits128 = 6,
 };
 
 enum class IMinMaxExchange : u64 {
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 63965525c..2321a37a2 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -115,10 +115,10 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
         };
 
         switch (instr.ldst_sl.type.Value()) {
-        case Tegra::Shader::StoreType::Bytes32:
+        case Tegra::Shader::StoreType::Bits32:
             SetRegister(bb, instr.gpr0, GetLmem(0));
             break;
-        case Tegra::Shader::StoreType::Bytes64: {
+        case Tegra::Shader::StoreType::Bits64: {
             SetTemporal(bb, 0, GetLmem(0));
             SetTemporal(bb, 1, GetLmem(4));
             SetRegister(bb, instr.gpr0, GetTemporal(0));
@@ -127,7 +127,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
         }
         default:
             UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
-                              static_cast<unsigned>(instr.ldst_sl.type.Value()));
+                              static_cast<u32>(instr.ldst_sl.type.Value()));
         }
         break;
     }
@@ -217,7 +217,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
                                      Immediate(static_cast<s32>(instr.smem_imm)));
 
         switch (instr.ldst_sl.type.Value()) {
-        case Tegra::Shader::StoreType::Bytes32:
+        case Tegra::Shader::StoreType::Bits32:
             SetLocalMemory(bb, index, GetRegister(instr.gpr0));
             break;
         default:

From f61c1ed2466d209b5b1ff09c52c664fe1a8e5a60 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 3 Feb 2019 00:35:20 -0300
Subject: [PATCH 3/4] shader_ir/memory: Add LD_L 128 bits loads

---
 src/video_core/shader/decode/memory.cpp | 26 ++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 2321a37a2..6eb36b1e7 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -116,13 +116,25 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
 
         switch (instr.ldst_sl.type.Value()) {
         case Tegra::Shader::StoreType::Bits32:
-            SetRegister(bb, instr.gpr0, GetLmem(0));
-            break;
-        case Tegra::Shader::StoreType::Bits64: {
-            SetTemporal(bb, 0, GetLmem(0));
-            SetTemporal(bb, 1, GetLmem(4));
-            SetRegister(bb, instr.gpr0, GetTemporal(0));
-            SetRegister(bb, instr.gpr0.Value() + 1, GetTemporal(1));
+        case Tegra::Shader::StoreType::Bits64:
+        case Tegra::Shader::StoreType::Bits128: {
+            const u32 count = [&]() {
+                switch (instr.ldst_sl.type.Value()) {
+                case Tegra::Shader::StoreType::Bits32:
+                    return 1;
+                case Tegra::Shader::StoreType::Bits64:
+                    return 2;
+                case Tegra::Shader::StoreType::Bits128:
+                    return 4;
+                default:
+                    UNREACHABLE();
+                    return 0;
+                }
+            }();
+            for (u32 i = 0; i < count; ++i)
+                SetTemporal(bb, i, GetLmem(i * 4));
+            for (u32 i = 0; i < count; ++i)
+                SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
             break;
         }
         default:

From dfa7be5ddf658622eb77ada4974ce9dfe4c6eda7 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Sun, 3 Feb 2019 19:08:10 -0300
Subject: [PATCH 4/4] shader_ir/memory: Add ST_L 64 and 128 bits stores

---
 src/video_core/shader/decode/memory.cpp | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 6eb36b1e7..bff40071b 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -225,12 +225,20 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, const BasicBlock& code, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
                              static_cast<u32>(instr.st_l.unknown.Value()));
 
-        const Node index = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
-                                     Immediate(static_cast<s32>(instr.smem_imm)));
+        const auto GetLmemAddr = [&](s32 offset) {
+            ASSERT(offset % 4 == 0);
+            const Node immediate = Immediate(static_cast<s32>(instr.smem_imm) + offset);
+            return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
+        };
 
         switch (instr.ldst_sl.type.Value()) {
+        case Tegra::Shader::StoreType::Bits128:
+            SetLocalMemory(bb, GetLmemAddr(12), GetRegister(instr.gpr0.Value() + 3));
+            SetLocalMemory(bb, GetLmemAddr(8), GetRegister(instr.gpr0.Value() + 2));
+        case Tegra::Shader::StoreType::Bits64:
+            SetLocalMemory(bb, GetLmemAddr(4), GetRegister(instr.gpr0.Value() + 1));
         case Tegra::Shader::StoreType::Bits32:
-            SetLocalMemory(bb, index, GetRegister(instr.gpr0));
+            SetLocalMemory(bb, GetLmemAddr(0), GetRegister(instr.gpr0));
             break;
         default:
             UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",