From 9b8e9623683da728f98019b096517704eb946d21 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 4 May 2020 17:49:48 -0300
Subject: [PATCH 1/3] maxwell_3d: Add viewport swizzles

---
 src/video_core/engines/maxwell_3d.cpp |  6 ++++++
 src/video_core/engines/maxwell_3d.h   | 19 ++++++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7db055ea0..8155f6e2e 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -44,6 +44,12 @@ void Maxwell3D::InitializeRegisterDefaults() {
         viewport.depth_range_near = 0.0f;
         viewport.depth_range_far = 1.0f;
     }
+    for (auto& viewport : regs.viewport_transform) {
+        viewport.swizzle.x.Assign(Regs::ViewportSwizzle::PositiveX);
+        viewport.swizzle.y.Assign(Regs::ViewportSwizzle::PositiveY);
+        viewport.swizzle.z.Assign(Regs::ViewportSwizzle::PositiveZ);
+        viewport.swizzle.w.Assign(Regs::ViewportSwizzle::PositiveW);
+    }
 
     // Doom and Bomberman seems to use the uninitialized registers and just enable blend
     // so initialize blend registers with sane values
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 864924ff3..4aeff4cc9 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -575,6 +575,17 @@ public:
             Replay = 3,
         };
 
+        enum class ViewportSwizzle : u32 {
+            PositiveX = 0,
+            NegativeX = 1,
+            PositiveY = 2,
+            NegativeY = 3,
+            PositiveZ = 4,
+            NegativeZ = 5,
+            PositiveW = 6,
+            NegativeW = 7,
+        };
+
         struct RenderTargetConfig {
             u32 address_high;
             u32 address_low;
@@ -618,7 +629,13 @@ public:
             f32 translate_x;
             f32 translate_y;
             f32 translate_z;
-            INSERT_UNION_PADDING_WORDS(2);
+            union {
+                BitField<0, 3, ViewportSwizzle> x;
+                BitField<4, 3, ViewportSwizzle> y;
+                BitField<8, 3, ViewportSwizzle> z;
+                BitField<12, 3, ViewportSwizzle> w;
+            } swizzle;
+            INSERT_UNION_PADDING_WORDS(1);
 
             Common::Rectangle<f32> GetRect() const {
                 return {

From f813cd3ff76dd7e0011b429a325217b5501c158f Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 4 May 2020 17:51:30 -0300
Subject: [PATCH 2/3] gl_rasterizer: Implement viewport swizzles with
 NV_viewport_swizzle

---
 src/video_core/renderer_opengl/gl_rasterizer.cpp | 8 ++++++++
 src/video_core/renderer_opengl/maxwell_to_gl.h   | 5 +++++
 2 files changed, 13 insertions(+)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 8b3b3ce92..69dcf952f 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1019,6 +1019,14 @@ void RasterizerOpenGL::SyncViewport() {
             const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
             const GLdouble far_depth = src.translate_z + src.scale_z;
             glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+
+            if (!GLAD_GL_NV_viewport_swizzle) {
+                continue;
+            }
+            glViewportSwizzleNV(static_cast<GLuint>(i), MaxwellToGL::ViewportSwizzle(src.swizzle.x),
+                                MaxwellToGL::ViewportSwizzle(src.swizzle.y),
+                                MaxwellToGL::ViewportSwizzle(src.swizzle.z),
+                                MaxwellToGL::ViewportSwizzle(src.swizzle.w));
         }
     }
 }
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 2c0c77c28..994ae98eb 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -503,5 +503,10 @@ inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
     return GL_FILL;
 }
 
+inline GLenum ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
+    // Enumeration order matches register order. We can convert it arithmetically.
+    return GL_VIEWPORT_SWIZZLE_POSITIVE_X_NV + static_cast<GLenum>(swizzle);
+}
+
 } // namespace MaxwellToGL
 } // namespace OpenGL

From 2dbf5290f28efa47b2484537e9316909831fee05 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 4 May 2020 18:31:17 -0300
Subject: [PATCH 3/3] vk_graphics_pipeline: Implement viewport swizzles with
 NV_viewport_swizzle

---
 src/video_core/engines/maxwell_3d.h           |  1 +
 .../renderer_vulkan/fixed_pipeline_state.cpp  |  8 +++++
 .../renderer_vulkan/fixed_pipeline_state.h    |  7 ++++
 .../renderer_vulkan/maxwell_to_vk.cpp         | 23 +++++++++++++
 .../renderer_vulkan/maxwell_to_vk.h           |  2 ++
 src/video_core/renderer_vulkan/vk_device.cpp  |  5 +++
 src/video_core/renderer_vulkan/vk_device.h    |  6 ++++
 .../renderer_vulkan/vk_graphics_pipeline.cpp  | 32 +++++++++++++++++++
 8 files changed, 84 insertions(+)

diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 4aeff4cc9..89e29a0d3 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -630,6 +630,7 @@ public:
             f32 translate_y;
             f32 translate_z;
             union {
+                u32 raw;
                 BitField<0, 3, ViewportSwizzle> x;
                 BitField<4, 3, ViewportSwizzle> y;
                 BitField<8, 3, ViewportSwizzle> z;
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
index 648b1e71b..6cead3a28 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <cstring>
 #include <tuple>
 
@@ -101,6 +102,12 @@ void FixedPipelineState::ColorBlending::Fill(const Maxwell& regs) noexcept {
     }
 }
 
+void FixedPipelineState::ViewportSwizzles::Fill(const Maxwell& regs) noexcept {
+    const auto& transform = regs.viewport_transform;
+    std::transform(transform.begin(), transform.end(), swizzles.begin(),
+                   [](const auto& viewport) { return static_cast<u16>(viewport.swizzle.raw); });
+}
+
 void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) {
     const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index];
 
@@ -144,6 +151,7 @@ void FixedPipelineState::Fill(const Maxwell& regs) {
     rasterizer.Fill(regs);
     depth_stencil.Fill(regs);
     color_blending.Fill(regs);
+    viewport_swizzles.Fill(regs);
 }
 
 std::size_t FixedPipelineState::Hash() const noexcept {
diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
index 8652067a7..cecaee48d 100644
--- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h
+++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h
@@ -233,10 +233,17 @@ struct FixedPipelineState {
         void Fill(const Maxwell& regs) noexcept;
     };
 
+    struct ViewportSwizzles {
+        std::array<u16, Maxwell::NumViewports> swizzles;
+
+        void Fill(const Maxwell& regs) noexcept;
+    };
+
     VertexInput vertex_input;
     Rasterizer rasterizer;
     DepthStencil depth_stencil;
     ColorBlending color_blending;
+    ViewportSwizzles viewport_swizzles;
 
     void Fill(const Maxwell& regs);
 
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 8681b821f..850165606 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -672,4 +672,27 @@ VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle) {
     return {};
 }
 
+VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle) {
+    switch (swizzle) {
+    case Maxwell::ViewportSwizzle::PositiveX:
+        return VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_X_NV;
+    case Maxwell::ViewportSwizzle::NegativeX:
+        return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_X_NV;
+    case Maxwell::ViewportSwizzle::PositiveY:
+        return VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_Y_NV;
+    case Maxwell::ViewportSwizzle::NegativeY:
+        return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_Y_NV;
+    case Maxwell::ViewportSwizzle::PositiveZ:
+        return VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_Z_NV;
+    case Maxwell::ViewportSwizzle::NegativeZ:
+        return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_Z_NV;
+    case Maxwell::ViewportSwizzle::PositiveW:
+        return VK_VIEWPORT_COORDINATE_SWIZZLE_POSITIVE_W_NV;
+    case Maxwell::ViewportSwizzle::NegativeW:
+        return VK_VIEWPORT_COORDINATE_SWIZZLE_NEGATIVE_W_NV;
+    }
+    UNREACHABLE_MSG("Invalid swizzle={}", static_cast<int>(swizzle));
+    return {};
+}
+
 } // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 81bce4c6c..7e213452f 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -59,4 +59,6 @@ VkCullModeFlags CullFace(Maxwell::CullFace cull_face);
 
 VkComponentSwizzle SwizzleSource(Tegra::Texture::SwizzleSource swizzle);
 
+VkViewportCoordinateSwizzleNV ViewportSwizzle(Maxwell::ViewportSwizzle swizzle);
+
 } // namespace Vulkan::MaxwellToVK
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 0e4bbca97..e1f65e3a7 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -260,6 +260,10 @@ bool VKDevice::Create() {
         LOG_INFO(Render_Vulkan, "Device doesn't support float16 natively");
     }
 
+    if (!nv_viewport_swizzle) {
+        LOG_INFO(Render_Vulkan, "Device doesn't support viewport swizzles");
+    }
+
     VkPhysicalDeviceUniformBufferStandardLayoutFeaturesKHR std430_layout;
     if (khr_uniform_buffer_standard_layout) {
         std430_layout.sType =
@@ -521,6 +525,7 @@ std::vector<const char*> VKDevice::LoadExtensions() {
     bool has_ext_subgroup_size_control{};
     bool has_ext_transform_feedback{};
     for (const auto& extension : physical.EnumerateDeviceExtensionProperties()) {
+        Test(extension, nv_viewport_swizzle, VK_NV_VIEWPORT_SWIZZLE_EXTENSION_NAME, true);
         Test(extension, khr_uniform_buffer_standard_layout,
              VK_KHR_UNIFORM_BUFFER_STANDARD_LAYOUT_EXTENSION_NAME, true);
         Test(extension, has_khr_shader_float16_int8, VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME,
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index c8640762d..12b05651b 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -147,6 +147,11 @@ public:
         return is_formatless_image_load_supported;
     }
 
+    /// Returns true if the device supports VK_NV_viewport_swizzle.
+    bool IsNvViewportSwizzleSupported() const {
+        return nv_viewport_swizzle;
+    }
+
     /// Returns true if the device supports VK_EXT_scalar_block_layout.
     bool IsKhrUniformBufferStandardLayoutSupported() const {
         return khr_uniform_buffer_standard_layout;
@@ -222,6 +227,7 @@ private:
     bool is_float16_supported{};            ///< Support for float16 arithmetics.
     bool is_warp_potentially_bigger{};      ///< Host warp size can be bigger than guest.
     bool is_formatless_image_load_supported{}; ///< Support for shader image read without format.
+    bool nv_viewport_swizzle{};                ///< Support for VK_NV_viewport_swizzle.
     bool khr_uniform_buffer_standard_layout{}; ///< Support for std430 on UBOs.
     bool ext_index_type_uint8{};               ///< Support for VK_EXT_index_type_uint8.
     bool ext_depth_range_unrestricted{};       ///< Support for VK_EXT_depth_range_unrestricted.
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 1ac981974..5beea6a03 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <array>
 #include <cstring>
 #include <vector>
@@ -50,6 +51,23 @@ bool SupportsPrimitiveRestart(VkPrimitiveTopology topology) {
                      topology) == std::end(unsupported_topologies);
 }
 
+VkViewportSwizzleNV UnpackViewportSwizzle(u16 swizzle) {
+    union {
+        u32 raw;
+        BitField<0, 3, Maxwell::ViewportSwizzle> x;
+        BitField<4, 3, Maxwell::ViewportSwizzle> y;
+        BitField<8, 3, Maxwell::ViewportSwizzle> z;
+        BitField<12, 3, Maxwell::ViewportSwizzle> w;
+    } const unpacked{swizzle};
+
+    VkViewportSwizzleNV result;
+    result.x = MaxwellToVK::ViewportSwizzle(unpacked.x);
+    result.y = MaxwellToVK::ViewportSwizzle(unpacked.y);
+    result.z = MaxwellToVK::ViewportSwizzle(unpacked.z);
+    result.w = MaxwellToVK::ViewportSwizzle(unpacked.w);
+    return result;
+}
+
 } // Anonymous namespace
 
 VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& scheduler,
@@ -162,6 +180,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
     const auto& ds = fixed_state.depth_stencil;
     const auto& cd = fixed_state.color_blending;
     const auto& rs = fixed_state.rasterizer;
+    const auto& viewport_swizzles = fixed_state.viewport_swizzles.swizzles;
 
     std::vector<VkVertexInputBindingDescription> vertex_bindings;
     std::vector<VkVertexInputBindingDivisorDescriptionEXT> vertex_binding_divisors;
@@ -244,6 +263,19 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa
     viewport_ci.scissorCount = Maxwell::NumViewports;
     viewport_ci.pScissors = nullptr;
 
+    std::array<VkViewportSwizzleNV, Maxwell::NumViewports> swizzles;
+    std::transform(viewport_swizzles.begin(), viewport_swizzles.end(), swizzles.begin(),
+                   UnpackViewportSwizzle);
+    VkPipelineViewportSwizzleStateCreateInfoNV swizzle_ci;
+    swizzle_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_SWIZZLE_STATE_CREATE_INFO_NV;
+    swizzle_ci.pNext = nullptr;
+    swizzle_ci.flags = 0;
+    swizzle_ci.viewportCount = Maxwell::NumViewports;
+    swizzle_ci.pViewportSwizzles = swizzles.data();
+    if (device.IsNvViewportSwizzleSupported()) {
+        viewport_ci.pNext = &swizzle_ci;
+    }
+
     VkPipelineRasterizationStateCreateInfo rasterization_ci;
     rasterization_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
     rasterization_ci.pNext = nullptr;