From b5bcd8c71b2d5fd0528191990b4e11bc916b5d7a Mon Sep 17 00:00:00 2001
From: ameerj <52414509+ameerj@users.noreply.github.com>
Date: Wed, 22 Feb 2023 00:48:12 -0500
Subject: [PATCH] configuration: Add async ASTC decode setting

---
 src/common/settings.cpp                         |  2 ++
 src/common/settings.h                           |  1 +
 .../renderer_opengl/gl_texture_cache.cpp        | 17 ++++++++++++++---
 .../renderer_vulkan/vk_texture_cache.cpp        |  7 ++++---
 src/video_core/texture_cache/texture_cache.h    |  1 +
 src/video_core/textures/astc.cpp                |  4 ++--
 src/yuzu/configuration/config.cpp               |  2 ++
 .../configure_graphics_advanced.cpp             |  7 +++++++
 .../configuration/configure_graphics_advanced.h |  1 +
 .../configure_graphics_advanced.ui              | 10 ++++++++++
 src/yuzu_cmd/config.cpp                         |  1 +
 src/yuzu_cmd/default_ini.h                      |  4 ++++
 12 files changed, 49 insertions(+), 8 deletions(-)

diff --git a/src/common/settings.cpp b/src/common/settings.cpp
index 49b41c158..70b02146b 100644
--- a/src/common/settings.cpp
+++ b/src/common/settings.cpp
@@ -59,6 +59,7 @@ void LogSettings() {
                 values.use_asynchronous_gpu_emulation.GetValue());
     log_setting("Renderer_NvdecEmulation", values.nvdec_emulation.GetValue());
     log_setting("Renderer_AccelerateASTC", values.accelerate_astc.GetValue());
+    log_setting("Renderer_AsyncASTC", values.async_astc.GetValue());
     log_setting("Renderer_UseVsync", values.use_vsync.GetValue());
     log_setting("Renderer_ShaderBackend", values.shader_backend.GetValue());
     log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue());
@@ -212,6 +213,7 @@ void RestoreGlobalState(bool is_powered_on) {
     values.use_asynchronous_gpu_emulation.SetGlobal(true);
     values.nvdec_emulation.SetGlobal(true);
     values.accelerate_astc.SetGlobal(true);
+    values.async_astc.SetGlobal(true);
     values.use_vsync.SetGlobal(true);
     values.shader_backend.SetGlobal(true);
     values.use_asynchronous_shaders.SetGlobal(true);
diff --git a/src/common/settings.h b/src/common/settings.h
index 6d27dd5ee..512ecff69 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -453,6 +453,7 @@ struct Values {
     SwitchableSetting<bool> use_asynchronous_gpu_emulation{true, "use_asynchronous_gpu_emulation"};
     SwitchableSetting<NvdecEmulation> nvdec_emulation{NvdecEmulation::GPU, "nvdec_emulation"};
     SwitchableSetting<bool> accelerate_astc{true, "accelerate_astc"};
+    SwitchableSetting<bool> async_astc{false, "async_astc"};
     SwitchableSetting<bool> use_vsync{true, "use_vsync"};
     SwitchableSetting<ShaderBackend, true> shader_backend{ShaderBackend::GLSL, ShaderBackend::GLSL,
                                                           ShaderBackend::SPIRV, "shader_backend"};
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index eb6e43a08..b047e7b3d 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -228,8 +228,9 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
 
 [[nodiscard]] bool CanBeAccelerated(const TextureCacheRuntime& runtime,
                                     const VideoCommon::ImageInfo& info) {
-    if (IsPixelFormatASTC(info.format)) {
-        return !runtime.HasNativeASTC() && Settings::values.accelerate_astc.GetValue();
+    if (IsPixelFormatASTC(info.format) && !runtime.HasNativeASTC()) {
+        return Settings::values.accelerate_astc.GetValue() &&
+               !Settings::values.async_astc.GetValue();
     }
     // Disable other accelerated uploads for now as they don't implement swizzled uploads
     return false;
@@ -258,6 +259,14 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4
     return format_info.compatibility_class == store_class;
 }
 
+[[nodiscard]] bool CanBeDecodedAsync(const TextureCacheRuntime& runtime,
+                                     const VideoCommon::ImageInfo& info) {
+    if (IsPixelFormatASTC(info.format) && !runtime.HasNativeASTC()) {
+        return Settings::values.async_astc.GetValue();
+    }
+    return false;
+}
+
 [[nodiscard]] CopyOrigin MakeCopyOrigin(VideoCommon::Offset3D offset,
                                         VideoCommon::SubresourceLayers subresource, GLenum target) {
     switch (target) {
@@ -721,7 +730,9 @@ std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t req
 Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
              VAddr cpu_addr_)
     : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
-    if (CanBeAccelerated(*runtime, info)) {
+    if (CanBeDecodedAsync(*runtime, info)) {
+        flags |= ImageFlagBits::AsynchronousDecode;
+    } else if (CanBeAccelerated(*runtime, info)) {
         flags |= ImageFlagBits::AcceleratedUpload;
     }
     if (IsConverted(runtime->device, info.format, info.type)) {
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
index 9b85dfb5e..80adb70eb 100644
--- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -1256,11 +1256,12 @@ Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu
       commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)),
       aspect_mask(ImageAspectMask(info.format)) {
     if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
-        if (Settings::values.accelerate_astc.GetValue()) {
+        if (Settings::values.async_astc.GetValue()) {
+            flags |= VideoCommon::ImageFlagBits::AsynchronousDecode;
+        } else if (Settings::values.accelerate_astc.GetValue()) {
             flags |= VideoCommon::ImageFlagBits::AcceleratedUpload;
-        } else {
-            flags |= VideoCommon::ImageFlagBits::Converted;
         }
+        flags |= VideoCommon::ImageFlagBits::Converted;
         flags |= VideoCommon::ImageFlagBits::CostlyLoad;
     }
     if (runtime->device.HasDebuggingToolAttached()) {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 4159bc796..9dd152fbe 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -1003,6 +1003,7 @@ u64 TextureCache<P>::GetScaledImageSizeBytes(const ImageBase& image) {
 template <class P>
 void TextureCache<P>::QueueAsyncDecode(Image& image, ImageId image_id) {
     UNIMPLEMENTED_IF(False(image.flags & ImageFlagBits::Converted));
+    LOG_INFO(HW_GPU, "Queuing async texture decode");
 
     image.flags |= ImageFlagBits::IsDecoding;
     auto decode = std::make_unique<AsyncDecodeContext>();
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index e8d7c7863..4381eed1d 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -1656,8 +1656,8 @@ void Decompress(std::span<const uint8_t> data, uint32_t width, uint32_t height,
     const u32 rows = Common::DivideUp(height, block_height);
     const u32 cols = Common::DivideUp(width, block_width);
 
-    Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2,
-                                 "ASTCDecompress"};
+    static Common::ThreadWorker workers{std::max(std::thread::hardware_concurrency(), 2U) / 2,
+                                        "ASTCDecompress"};
 
     for (u32 z = 0; z < depth; ++z) {
         const u32 depth_offset = z * height * width * 4;
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index db68ed259..dd1c1e94a 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -707,6 +707,7 @@ void Config::ReadRendererValues() {
     ReadGlobalSetting(Settings::values.use_asynchronous_gpu_emulation);
     ReadGlobalSetting(Settings::values.nvdec_emulation);
     ReadGlobalSetting(Settings::values.accelerate_astc);
+    ReadGlobalSetting(Settings::values.async_astc);
     ReadGlobalSetting(Settings::values.use_vsync);
     ReadGlobalSetting(Settings::values.shader_backend);
     ReadGlobalSetting(Settings::values.use_asynchronous_shaders);
@@ -1350,6 +1351,7 @@ void Config::SaveRendererValues() {
                  static_cast<u32>(Settings::values.nvdec_emulation.GetDefault()),
                  Settings::values.nvdec_emulation.UsingGlobal());
     WriteGlobalSetting(Settings::values.accelerate_astc);
+    WriteGlobalSetting(Settings::values.async_astc);
     WriteGlobalSetting(Settings::values.use_vsync);
     WriteSetting(QString::fromStdString(Settings::values.shader_backend.GetLabel()),
                  static_cast<u32>(Settings::values.shader_backend.GetValue(global)),
diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp
index cc0155a2c..bbc363322 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.cpp
+++ b/src/yuzu/configuration/configure_graphics_advanced.cpp
@@ -23,11 +23,13 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
     const bool runtime_lock = !system.IsPoweredOn();
     ui->use_vsync->setEnabled(runtime_lock);
     ui->renderer_force_max_clock->setEnabled(runtime_lock);
+    ui->async_astc->setEnabled(runtime_lock);
     ui->use_asynchronous_shaders->setEnabled(runtime_lock);
     ui->anisotropic_filtering_combobox->setEnabled(runtime_lock);
 
     ui->renderer_force_max_clock->setChecked(Settings::values.renderer_force_max_clock.GetValue());
     ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue());
+    ui->async_astc->setChecked(Settings::values.async_astc.GetValue());
     ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue());
     ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue());
     ui->use_pessimistic_flushes->setChecked(Settings::values.use_pessimistic_flushes.GetValue());
@@ -60,6 +62,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.max_anisotropy,
                                              ui->anisotropic_filtering_combobox);
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_vsync, ui->use_vsync, use_vsync);
+    ConfigurationShared::ApplyPerGameSetting(&Settings::values.async_astc, ui->async_astc,
+                                             async_astc);
     ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders,
                                              ui->use_asynchronous_shaders,
                                              use_asynchronous_shaders);
@@ -91,6 +95,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
         ui->renderer_force_max_clock->setEnabled(
             Settings::values.renderer_force_max_clock.UsingGlobal());
         ui->use_vsync->setEnabled(Settings::values.use_vsync.UsingGlobal());
+        ui->async_astc->setEnabled(Settings::values.async_astc.UsingGlobal());
         ui->use_asynchronous_shaders->setEnabled(
             Settings::values.use_asynchronous_shaders.UsingGlobal());
         ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal());
@@ -108,6 +113,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() {
                                             Settings::values.renderer_force_max_clock,
                                             renderer_force_max_clock);
     ConfigurationShared::SetColoredTristate(ui->use_vsync, Settings::values.use_vsync, use_vsync);
+    ConfigurationShared::SetColoredTristate(ui->async_astc, Settings::values.async_astc,
+                                            async_astc);
     ConfigurationShared::SetColoredTristate(ui->use_asynchronous_shaders,
                                             Settings::values.use_asynchronous_shaders,
                                             use_asynchronous_shaders);
diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h
index df557d585..bf1b04749 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.h
+++ b/src/yuzu/configuration/configure_graphics_advanced.h
@@ -38,6 +38,7 @@ private:
 
     ConfigurationShared::CheckState renderer_force_max_clock;
     ConfigurationShared::CheckState use_vsync;
+    ConfigurationShared::CheckState async_astc;
     ConfigurationShared::CheckState use_asynchronous_shaders;
     ConfigurationShared::CheckState use_fast_gpu_time;
     ConfigurationShared::CheckState use_pessimistic_flushes;
diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui
index 061885e30..a7dbdc18c 100644
--- a/src/yuzu/configuration/configure_graphics_advanced.ui
+++ b/src/yuzu/configuration/configure_graphics_advanced.ui
@@ -89,6 +89,16 @@
           </property>
          </widget>
         </item>
+        <item>
+         <widget class="QCheckBox" name="async_astc">
+          <property name="toolTip">
+           <string>Enables asynchronous ASTC texture decoding, which may reduce load time stutter. This feature is experimental.</string>
+          </property>
+          <property name="text">
+           <string>Decode ASTC textures asynchronously (Hack)</string>
+          </property>
+         </widget>
+        </item>
         <item>
          <widget class="QCheckBox" name="use_asynchronous_shaders">
           <property name="toolTip">
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 3b6dce296..464da3231 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -324,6 +324,7 @@ void Config::ReadValues() {
     ReadSetting("Renderer", Settings::values.use_asynchronous_shaders);
     ReadSetting("Renderer", Settings::values.nvdec_emulation);
     ReadSetting("Renderer", Settings::values.accelerate_astc);
+    ReadSetting("Renderer", Settings::values.async_astc);
     ReadSetting("Renderer", Settings::values.use_fast_gpu_time);
     ReadSetting("Renderer", Settings::values.use_pessimistic_flushes);
     ReadSetting("Renderer", Settings::values.use_vulkan_driver_pipeline_cache);
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index cf3cc4c4e..20e403400 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -342,6 +342,10 @@ nvdec_emulation =
 # 0: Off, 1 (default): On
 accelerate_astc =
 
+# Decode ASTC textures asynchronously.
+# 0 (default): Off, 1: On
+async_astc =
+
 # Turns on the speed limiter, which will limit the emulation speed to the desired speed limit value
 # 0: Off, 1: On (default)
 use_speed_limit =