From ea9ff71725113b8dbb159917c57aa536bba0cb53 Mon Sep 17 00:00:00 2001
From: Kelebek1 <eeeedddccc@hotmail.co.uk>
Date: Mon, 1 Aug 2022 02:58:13 +0100
Subject: [PATCH 1/2] Rework audio output, connecting AudioOut into coretiming
 to fix desync during heavy loads.

---
 src/audio_core/CMakeLists.txt                 |   1 +
 src/audio_core/audio_core.cpp                 |   8 -
 src/audio_core/audio_core.h                   |  16 -
 src/audio_core/device/audio_buffer.h          |   4 +
 src/audio_core/device/audio_buffers.h         |  13 +-
 src/audio_core/device/device_session.cpp      |  52 ++-
 src/audio_core/device/device_session.h        |  27 +-
 src/audio_core/in/audio_in_system.cpp         |  10 +-
 src/audio_core/out/audio_out_system.cpp       |  10 +-
 .../renderer/adsp/audio_renderer.cpp          |   9 +-
 .../renderer/behavior/behavior_info.cpp       |  12 +-
 .../renderer/command/sink/device.cpp          |   4 +
 src/audio_core/renderer/system_manager.cpp    |  35 +-
 src/audio_core/sink/cubeb_sink.cpp            | 349 ++---------------
 src/audio_core/sink/cubeb_sink.h              |   2 +-
 src/audio_core/sink/null_sink.h               |  47 ++-
 src/audio_core/sink/sdl2_sink.cpp             | 350 ++----------------
 src/audio_core/sink/sdl2_sink.h               |   2 +-
 src/audio_core/sink/sink.h                    |   2 +-
 src/audio_core/sink/sink_details.cpp          |   6 +-
 src/audio_core/sink/sink_stream.cpp           | 259 +++++++++++++
 src/audio_core/sink/sink_stream.h             | 171 +++++----
 src/core/hle/result.h                         |   2 +
 23 files changed, 550 insertions(+), 841 deletions(-)
 create mode 100644 src/audio_core/sink/sink_stream.cpp

diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt
index 5fe1d5fa5..144f1bab2 100644
--- a/src/audio_core/CMakeLists.txt
+++ b/src/audio_core/CMakeLists.txt
@@ -194,6 +194,7 @@ add_library(audio_core STATIC
     sink/sink.h
     sink/sink_details.cpp
     sink/sink_details.h
+    sink/sink_stream.cpp
     sink/sink_stream.h
 )
 
diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp
index 78e615a10..cf7e763e6 100644
--- a/src/audio_core/audio_core.cpp
+++ b/src/audio_core/audio_core.cpp
@@ -57,12 +57,4 @@ void AudioCore::PauseSinks(const bool pausing) const {
     }
 }
 
-u32 AudioCore::GetStreamQueue() const {
-    return estimated_queue.load();
-}
-
-void AudioCore::SetStreamQueue(u32 size) {
-    estimated_queue.store(size);
-}
-
 } // namespace AudioCore
diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h
index 0f7d61ee4..fd1e43356 100644
--- a/src/audio_core/audio_core.h
+++ b/src/audio_core/audio_core.h
@@ -65,20 +65,6 @@ public:
      */
     void PauseSinks(bool pausing) const;
 
-    /**
-     * Get the size of the current stream queue.
-     *
-     * @return Current stream queue size.
-     */
-    u32 GetStreamQueue() const;
-
-    /**
-     * Get the size of the current stream queue.
-     *
-     * @param size - New stream size.
-     */
-    void SetStreamQueue(u32 size);
-
 private:
     /**
      * Create the sinks on startup.
@@ -93,8 +79,6 @@ private:
     std::unique_ptr<Sink::Sink> input_sink;
     /// The ADSP in the sysmodule
     std::unique_ptr<AudioRenderer::ADSP::ADSP> adsp;
-    /// Current size of the stream queue
-    std::atomic<u32> estimated_queue{0};
 };
 
 } // namespace AudioCore
diff --git a/src/audio_core/device/audio_buffer.h b/src/audio_core/device/audio_buffer.h
index cae7fa970..7128ef72a 100644
--- a/src/audio_core/device/audio_buffer.h
+++ b/src/audio_core/device/audio_buffer.h
@@ -8,6 +8,10 @@
 namespace AudioCore {
 
 struct AudioBuffer {
+    /// Timestamp this buffer started playing.
+    u64 start_timestamp;
+    /// Timestamp this buffer should finish playing.
+    u64 end_timestamp;
     /// Timestamp this buffer completed playing.
     s64 played_timestamp;
     /// Game memory address for these samples.
diff --git a/src/audio_core/device/audio_buffers.h b/src/audio_core/device/audio_buffers.h
index 5d1979ea0..57c78d439 100644
--- a/src/audio_core/device/audio_buffers.h
+++ b/src/audio_core/device/audio_buffers.h
@@ -58,6 +58,7 @@ public:
             if (index < 0) {
                 index += N;
             }
+
             out_buffers.push_back(buffers[index]);
             registered_count++;
             registered_index = (registered_index + 1) % append_limit;
@@ -100,7 +101,7 @@ public:
             }
 
             // Check with the backend if this buffer can be released yet.
-            if (!session.IsBufferConsumed(buffers[index].tag)) {
+            if (!session.IsBufferConsumed(buffers[index])) {
                 break;
             }
 
@@ -280,6 +281,16 @@ public:
         return true;
     }
 
+    u64 GetNextTimestamp() const {
+        // Iterate backwards through the buffer queue, and take the most recent buffer's end
+        std::scoped_lock l{lock};
+        auto index{appended_index - 1};
+        if (index < 0) {
+            index += append_limit;
+        }
+        return buffers[index].end_timestamp;
+    }
+
 private:
     /// Buffer lock
     mutable std::recursive_mutex lock{};
diff --git a/src/audio_core/device/device_session.cpp b/src/audio_core/device/device_session.cpp
index 095fc96ce..c71c3a376 100644
--- a/src/audio_core/device/device_session.cpp
+++ b/src/audio_core/device/device_session.cpp
@@ -7,11 +7,20 @@
 #include "audio_core/device/device_session.h"
 #include "audio_core/sink/sink_stream.h"
 #include "core/core.h"
+#include "core/core_timing.h"
 #include "core/memory.h"
 
 namespace AudioCore {
 
-DeviceSession::DeviceSession(Core::System& system_) : system{system_} {}
+using namespace std::literals;
+constexpr auto INCREMENT_TIME{5ms};
+
+DeviceSession::DeviceSession(Core::System& system_)
+    : system{system_}, thread_event{Core::Timing::CreateEvent(
+                           "AudioOutSampleTick",
+                           [this](std::uintptr_t, s64 time, std::chrono::nanoseconds) {
+                               return ThreadFunc();
+                           })} {}
 
 DeviceSession::~DeviceSession() {
     Finalize();
@@ -50,20 +59,21 @@ void DeviceSession::Finalize() {
 }
 
 void DeviceSession::Start() {
-    stream->SetPlayedSampleCount(played_sample_count);
-    stream->Start();
+    if (stream) {
+        stream->Start();
+        system.CoreTiming().ScheduleLoopingEvent(std::chrono::nanoseconds::zero(), INCREMENT_TIME,
+                                                 thread_event);
+    }
 }
 
 void DeviceSession::Stop() {
     if (stream) {
-        played_sample_count = stream->GetPlayedSampleCount();
         stream->Stop();
+        system.CoreTiming().UnscheduleEvent(thread_event, {});
     }
 }
 
 void DeviceSession::AppendBuffers(std::span<AudioBuffer> buffers) const {
-    auto& memory{system.Memory()};
-
     for (size_t i = 0; i < buffers.size(); i++) {
         Sink::SinkBuffer new_buffer{
             .frames = buffers[i].size / (channel_count * sizeof(s16)),
@@ -77,7 +87,7 @@ void DeviceSession::AppendBuffers(std::span<AudioBuffer> buffers) const {
             stream->AppendBuffer(new_buffer, samples);
         } else {
             std::vector<s16> samples(buffers[i].size / sizeof(s16));
-            memory.ReadBlockUnsafe(buffers[i].samples, samples.data(), buffers[i].size);
+            system.Memory().ReadBlockUnsafe(buffers[i].samples, samples.data(), buffers[i].size);
             stream->AppendBuffer(new_buffer, samples);
         }
     }
@@ -85,17 +95,13 @@ void DeviceSession::AppendBuffers(std::span<AudioBuffer> buffers) const {
 
 void DeviceSession::ReleaseBuffer(AudioBuffer& buffer) const {
     if (type == Sink::StreamType::In) {
-        auto& memory{system.Memory()};
         auto samples{stream->ReleaseBuffer(buffer.size / sizeof(s16))};
-        memory.WriteBlockUnsafe(buffer.samples, samples.data(), buffer.size);
+        system.Memory().WriteBlockUnsafe(buffer.samples, samples.data(), buffer.size);
     }
 }
 
-bool DeviceSession::IsBufferConsumed(u64 tag) const {
-    if (stream) {
-        return stream->IsBufferConsumed(tag);
-    }
-    return true;
+bool DeviceSession::IsBufferConsumed(AudioBuffer& buffer) const {
+    return played_sample_count >= buffer.end_timestamp;
 }
 
 void DeviceSession::SetVolume(f32 volume) const {
@@ -105,10 +111,22 @@ void DeviceSession::SetVolume(f32 volume) const {
 }
 
 u64 DeviceSession::GetPlayedSampleCount() const {
-    if (stream) {
-        return stream->GetPlayedSampleCount();
+    return played_sample_count;
+}
+
+std::optional<std::chrono::nanoseconds> DeviceSession::ThreadFunc() {
+    // Add 5ms of samples at a 48K sample rate.
+    played_sample_count += 48'000 * INCREMENT_TIME / 1s;
+    if (type == Sink::StreamType::Out) {
+        system.AudioCore().GetAudioManager().SetEvent(Event::Type::AudioOutManager, true);
+    } else {
+        system.AudioCore().GetAudioManager().SetEvent(Event::Type::AudioInManager, true);
     }
-    return 0;
+    return std::nullopt;
+}
+
+void DeviceSession::SetRingSize(u32 ring_size) {
+    stream->SetRingSize(ring_size);
 }
 
 } // namespace AudioCore
diff --git a/src/audio_core/device/device_session.h b/src/audio_core/device/device_session.h
index 4a031b765..3414e2c06 100644
--- a/src/audio_core/device/device_session.h
+++ b/src/audio_core/device/device_session.h
@@ -3,6 +3,9 @@
 
 #pragma once
 
+#include <chrono>
+#include <memory>
+#include <optional>
 #include <span>
 
 #include "audio_core/common/common.h"
@@ -11,9 +14,13 @@
 
 namespace Core {
 class System;
-}
+namespace Timing {
+struct EventType;
+} // namespace Timing
+} // namespace Core
 
 namespace AudioCore {
+
 namespace Sink {
 class SinkStream;
 struct SinkBuffer;
@@ -70,7 +77,7 @@ public:
      * @param tag - Unqiue tag of the buffer to check.
      * @return true if the buffer has been consumed, otherwise false.
      */
-    bool IsBufferConsumed(u64 tag) const;
+    bool IsBufferConsumed(AudioBuffer& buffer) const;
 
     /**
      * Start this device session, starting the backend stream.
@@ -96,6 +103,16 @@ public:
      */
     u64 GetPlayedSampleCount() const;
 
+    /*
+     * CoreTiming callback to increment played_sample_count over time.
+     */
+    std::optional<std::chrono::nanoseconds> ThreadFunc();
+
+    /*
+     * Set the size of the ring buffer.
+     */
+    void SetRingSize(u32 ring_size);
+
 private:
     /// System
     Core::System& system;
@@ -118,9 +135,13 @@ private:
     /// Applet resource user id of this device session
     u64 applet_resource_user_id{};
     /// Total number of samples played by this device session
-    u64 played_sample_count{};
+    std::atomic<u64> played_sample_count{};
+    /// Event increasing the played sample count every 5ms
+    std::shared_ptr<Core::Timing::EventType> thread_event;
     /// Is this session initialised?
     bool initialized{};
+    /// Buffer queue
+    std::vector<AudioBuffer> buffer_queue{};
 };
 
 } // namespace AudioCore
diff --git a/src/audio_core/in/audio_in_system.cpp b/src/audio_core/in/audio_in_system.cpp
index ec5d37ed4..7e80ba03c 100644
--- a/src/audio_core/in/audio_in_system.cpp
+++ b/src/audio_core/in/audio_in_system.cpp
@@ -93,6 +93,7 @@ Result System::Start() {
     std::vector<AudioBuffer> buffers_to_flush{};
     buffers.RegisterBuffers(buffers_to_flush);
     session->AppendBuffers(buffers_to_flush);
+    session->SetRingSize(static_cast<u32>(buffers_to_flush.size()));
 
     return ResultSuccess;
 }
@@ -112,8 +113,13 @@ bool System::AppendBuffer(const AudioInBuffer& buffer, const u64 tag) {
         return false;
     }
 
-    AudioBuffer new_buffer{
-        .played_timestamp = 0, .samples = buffer.samples, .tag = tag, .size = buffer.size};
+    const auto timestamp{buffers.GetNextTimestamp()};
+    AudioBuffer new_buffer{.start_timestamp = timestamp,
+                           .end_timestamp = timestamp + buffer.size / (channel_count * sizeof(s16)),
+                           .played_timestamp = 0,
+                           .samples = buffer.samples,
+                           .tag = tag,
+                           .size = buffer.size};
 
     buffers.AppendBuffer(new_buffer);
     RegisterBuffers();
diff --git a/src/audio_core/out/audio_out_system.cpp b/src/audio_core/out/audio_out_system.cpp
index 35afddf06..8941b09a0 100644
--- a/src/audio_core/out/audio_out_system.cpp
+++ b/src/audio_core/out/audio_out_system.cpp
@@ -92,6 +92,7 @@ Result System::Start() {
     std::vector<AudioBuffer> buffers_to_flush{};
     buffers.RegisterBuffers(buffers_to_flush);
     session->AppendBuffers(buffers_to_flush);
+    session->SetRingSize(static_cast<u32>(buffers_to_flush.size()));
 
     return ResultSuccess;
 }
@@ -111,8 +112,13 @@ bool System::AppendBuffer(const AudioOutBuffer& buffer, u64 tag) {
         return false;
     }
 
-    AudioBuffer new_buffer{
-        .played_timestamp = 0, .samples = buffer.samples, .tag = tag, .size = buffer.size};
+    const auto timestamp{buffers.GetNextTimestamp()};
+    AudioBuffer new_buffer{.start_timestamp = timestamp,
+                           .end_timestamp = timestamp + buffer.size / (channel_count * sizeof(s16)),
+                           .played_timestamp = 0,
+                           .samples = buffer.samples,
+                           .tag = tag,
+                           .size = buffer.size};
 
     buffers.AppendBuffer(new_buffer);
     RegisterBuffers();
diff --git a/src/audio_core/renderer/adsp/audio_renderer.cpp b/src/audio_core/renderer/adsp/audio_renderer.cpp
index 3967ccfe6..bcd889ecb 100644
--- a/src/audio_core/renderer/adsp/audio_renderer.cpp
+++ b/src/audio_core/renderer/adsp/audio_renderer.cpp
@@ -106,9 +106,6 @@ void AudioRenderer::Start(AudioRenderer_Mailbox* mailbox_) {
 
     mailbox = mailbox_;
     thread = std::thread(&AudioRenderer::ThreadFunc, this);
-    for (auto& stream : streams) {
-        stream->Start();
-    }
     running = true;
 }
 
@@ -130,6 +127,7 @@ void AudioRenderer::CreateSinkStreams() {
         std::string name{fmt::format("ADSP_RenderStream-{}", i)};
         streams[i] =
             sink.AcquireSinkStream(system, channels, name, ::AudioCore::Sink::StreamType::Render);
+        streams[i]->SetRingSize(4);
     }
 }
 
@@ -198,11 +196,6 @@ void AudioRenderer::ThreadFunc() {
                             command_list_processor.Process(index) - start_time;
                     }
 
-                    if (index == 0) {
-                        auto stream{command_list_processor.GetOutputSinkStream()};
-                        system.AudioCore().SetStreamQueue(stream->GetQueueSize());
-                    }
-
                     const auto end_time{system.CoreTiming().GetClockTicks()};
 
                     command_buffer.remaining_command_count =
diff --git a/src/audio_core/renderer/behavior/behavior_info.cpp b/src/audio_core/renderer/behavior/behavior_info.cpp
index c5d4d66d8..92140aaea 100644
--- a/src/audio_core/renderer/behavior/behavior_info.cpp
+++ b/src/audio_core/renderer/behavior/behavior_info.cpp
@@ -43,13 +43,15 @@ void BehaviorInfo::AppendError(ErrorInfo& error) {
 }
 
 void BehaviorInfo::CopyErrorInfo(std::span<ErrorInfo> out_errors, u32& out_count) {
-    auto error_count_{std::min(error_count, MaxErrors)};
-    std::memset(out_errors.data(), 0, MaxErrors * sizeof(ErrorInfo));
+    out_count = std::min(error_count, MaxErrors);
 
-    for (size_t i = 0; i < error_count_; i++) {
-        out_errors[i] = errors[i];
+    for (size_t i = 0; i < MaxErrors; i++) {
+        if (i < out_count) {
+            out_errors[i] = errors[i];
+        } else {
+            out_errors[i] = {};
+        }
     }
-    out_count = error_count_;
 }
 
 void BehaviorInfo::UpdateFlags(const Flags flags_) {
diff --git a/src/audio_core/renderer/command/sink/device.cpp b/src/audio_core/renderer/command/sink/device.cpp
index 47e0c6722..e88372a75 100644
--- a/src/audio_core/renderer/command/sink/device.cpp
+++ b/src/audio_core/renderer/command/sink/device.cpp
@@ -46,6 +46,10 @@ void DeviceSinkCommand::Process(const ADSP::CommandListProcessor& processor) {
 
     out_buffer.tag = reinterpret_cast<u64>(samples.data());
     stream->AppendBuffer(out_buffer, samples);
+
+    if (stream->IsPaused()) {
+        stream->Start();
+    }
 }
 
 bool DeviceSinkCommand::Verify(const ADSP::CommandListProcessor& processor) {
diff --git a/src/audio_core/renderer/system_manager.cpp b/src/audio_core/renderer/system_manager.cpp
index b326819ed..bc2dd9e6e 100644
--- a/src/audio_core/renderer/system_manager.cpp
+++ b/src/audio_core/renderer/system_manager.cpp
@@ -15,8 +15,7 @@ MICROPROFILE_DEFINE(Audio_RenderSystemManager, "Audio", "Render System Manager",
                     MP_RGB(60, 19, 97));
 
 namespace AudioCore::AudioRenderer {
-constexpr std::chrono::nanoseconds BaseRenderTime{5'000'000UL};
-constexpr std::chrono::nanoseconds RenderTimeOffset{400'000UL};
+constexpr std::chrono::nanoseconds RENDER_TIME{5'000'000UL};
 
 SystemManager::SystemManager(Core::System& core_)
     : core{core_}, adsp{core.AudioCore().GetADSP()}, mailbox{adsp.GetRenderMailbox()},
@@ -36,8 +35,8 @@ bool SystemManager::InitializeUnsafe() {
         if (adsp.Start()) {
             active = true;
             thread = std::jthread([this](std::stop_token stop_token) { ThreadFunc(); });
-            core.CoreTiming().ScheduleLoopingEvent(std::chrono::nanoseconds(0),
-                                                   BaseRenderTime - RenderTimeOffset, thread_event);
+            core.CoreTiming().ScheduleLoopingEvent(std::chrono::nanoseconds(0), RENDER_TIME,
+                                                   thread_event);
         }
     }
 
@@ -121,35 +120,9 @@ void SystemManager::ThreadFunc() {
 }
 
 std::optional<std::chrono::nanoseconds> SystemManager::ThreadFunc2(s64 time) {
-    std::optional<std::chrono::nanoseconds> new_schedule_time{std::nullopt};
-    const auto queue_size{core.AudioCore().GetStreamQueue()};
-    switch (state) {
-    case StreamState::Filling:
-        if (queue_size >= 5) {
-            new_schedule_time = BaseRenderTime;
-            state = StreamState::Steady;
-        }
-        break;
-    case StreamState::Steady:
-        if (queue_size <= 2) {
-            new_schedule_time = BaseRenderTime - RenderTimeOffset;
-            state = StreamState::Filling;
-        } else if (queue_size > 5) {
-            new_schedule_time = BaseRenderTime + RenderTimeOffset;
-            state = StreamState::Draining;
-        }
-        break;
-    case StreamState::Draining:
-        if (queue_size <= 5) {
-            new_schedule_time = BaseRenderTime;
-            state = StreamState::Steady;
-        }
-        break;
-    }
-
     update.store(true);
     update.notify_all();
-    return new_schedule_time;
+    return std::nullopt;
 }
 
 void SystemManager::PauseCallback(bool paused) {
diff --git a/src/audio_core/sink/cubeb_sink.cpp b/src/audio_core/sink/cubeb_sink.cpp
index 90d049e8e..9ae043611 100644
--- a/src/audio_core/sink/cubeb_sink.cpp
+++ b/src/audio_core/sink/cubeb_sink.cpp
@@ -1,21 +1,13 @@
 // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
-#include <algorithm>
-#include <atomic>
 #include <span>
+#include <vector>
 
-#include "audio_core/audio_core.h"
-#include "audio_core/audio_event.h"
-#include "audio_core/audio_manager.h"
+#include "audio_core/common/common.h"
 #include "audio_core/sink/cubeb_sink.h"
 #include "audio_core/sink/sink_stream.h"
-#include "common/assert.h"
-#include "common/fixed_point.h"
 #include "common/logging/log.h"
-#include "common/reader_writer_queue.h"
-#include "common/ring_buffer.h"
-#include "common/settings.h"
 #include "core/core.h"
 
 #ifdef _WIN32
@@ -42,10 +34,10 @@ public:
      * @param system_          - Core system.
      * @param event            - Event used only for audio renderer, signalled on buffer consume.
      */
-    CubebSinkStream(cubeb* ctx_, const u32 device_channels_, const u32 system_channels_,
+    CubebSinkStream(cubeb* ctx_, u32 device_channels_, u32 system_channels_,
                     cubeb_devid output_device, cubeb_devid input_device, const std::string& name_,
-                    const StreamType type_, Core::System& system_)
-        : ctx{ctx_}, type{type_}, system{system_} {
+                    StreamType type_, Core::System& system_)
+        : SinkStream(system_, type_), ctx{ctx_} {
 #ifdef _WIN32
         CoInitializeEx(nullptr, COINIT_MULTITHREADED);
 #endif
@@ -79,12 +71,10 @@ public:
 
         minimum_latency = std::max(minimum_latency, 256u);
 
-        playing_buffer.consumed = true;
-
-        LOG_DEBUG(Service_Audio,
-                  "Opening cubeb stream {} type {} with: rate {} channels {} (system channels {}) "
-                  "latency {}",
-                  name, type, params.rate, params.channels, system_channels, minimum_latency);
+        LOG_INFO(Service_Audio,
+                 "Opening cubeb stream {} type {} with: rate {} channels {} (system channels {}) "
+                 "latency {}",
+                 name, type, params.rate, params.channels, system_channels, minimum_latency);
 
         auto init_error{0};
         if (type == StreamType::In) {
@@ -111,6 +101,8 @@ public:
     ~CubebSinkStream() override {
         LOG_DEBUG(Service_Audio, "Destructing cubeb stream {}", name);
 
+        Unstall();
+
         if (!ctx) {
             return;
         }
@@ -136,7 +128,7 @@ public:
      * @param resume - Set to true if this is resuming the stream a previously-active stream.
      *                 Default false.
      */
-    void Start(const bool resume = false) override {
+    void Start(bool resume = false) override {
         if (!ctx) {
             return;
         }
@@ -158,6 +150,7 @@ public:
      * Stop the sink stream.
      */
     void Stop() override {
+        Unstall();
         if (!ctx) {
             return;
         }
@@ -170,194 +163,7 @@ public:
         paused = true;
     }
 
-    /**
-     * Append a new buffer and its samples to a waiting queue to play.
-     *
-     * @param buffer  - Audio buffer information to be queued.
-     * @param samples - The s16 samples to be queue for playback.
-     */
-    void AppendBuffer(::AudioCore::Sink::SinkBuffer& buffer, std::vector<s16>& samples) override {
-        if (type == StreamType::In) {
-            queue.enqueue(buffer);
-            queued_buffers++;
-        } else {
-            constexpr s32 min{std::numeric_limits<s16>::min()};
-            constexpr s32 max{std::numeric_limits<s16>::max()};
-
-            auto yuzu_volume{Settings::Volume()};
-            if (yuzu_volume > 1.0f) {
-                yuzu_volume = 0.6f + 20 * std::log10(yuzu_volume);
-            }
-            auto volume{system_volume * device_volume * yuzu_volume};
-
-            if (system_channels == 6 && device_channels == 2) {
-                // We're given 6 channels, but our device only outputs 2, so downmix.
-                constexpr std::array<f32, 4> down_mix_coeff{1.0f, 0.707f, 0.251f, 0.707f};
-
-                for (u32 read_index = 0, write_index = 0; read_index < samples.size();
-                     read_index += system_channels, write_index += device_channels) {
-                    const auto left_sample{
-                        ((Common::FixedPoint<49, 15>(
-                              samples[read_index + static_cast<u32>(Channels::FrontLeft)]) *
-                              down_mix_coeff[0] +
-                          samples[read_index + static_cast<u32>(Channels::Center)] *
-                              down_mix_coeff[1] +
-                          samples[read_index + static_cast<u32>(Channels::LFE)] *
-                              down_mix_coeff[2] +
-                          samples[read_index + static_cast<u32>(Channels::BackLeft)] *
-                              down_mix_coeff[3]) *
-                         volume)
-                            .to_int()};
-
-                    const auto right_sample{
-                        ((Common::FixedPoint<49, 15>(
-                              samples[read_index + static_cast<u32>(Channels::FrontRight)]) *
-                              down_mix_coeff[0] +
-                          samples[read_index + static_cast<u32>(Channels::Center)] *
-                              down_mix_coeff[1] +
-                          samples[read_index + static_cast<u32>(Channels::LFE)] *
-                              down_mix_coeff[2] +
-                          samples[read_index + static_cast<u32>(Channels::BackRight)] *
-                              down_mix_coeff[3]) *
-                         volume)
-                            .to_int()};
-
-                    samples[write_index + static_cast<u32>(Channels::FrontLeft)] =
-                        static_cast<s16>(std::clamp(left_sample, min, max));
-                    samples[write_index + static_cast<u32>(Channels::FrontRight)] =
-                        static_cast<s16>(std::clamp(right_sample, min, max));
-                }
-
-                samples.resize(samples.size() / system_channels * device_channels);
-
-            } else if (system_channels == 2 && device_channels == 6) {
-                // We need moar samples! Not all games will provide 6 channel audio.
-                // TODO: Implement some upmixing here. Currently just passthrough, with other
-                // channels left as silence.
-                std::vector<s16> new_samples(samples.size() / system_channels * device_channels, 0);
-
-                for (u32 read_index = 0, write_index = 0; read_index < samples.size();
-                     read_index += system_channels, write_index += device_channels) {
-                    const auto left_sample{static_cast<s16>(std::clamp(
-                        static_cast<s32>(
-                            static_cast<f32>(
-                                samples[read_index + static_cast<u32>(Channels::FrontLeft)]) *
-                            volume),
-                        min, max))};
-
-                    new_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample;
-
-                    const auto right_sample{static_cast<s16>(std::clamp(
-                        static_cast<s32>(
-                            static_cast<f32>(
-                                samples[read_index + static_cast<u32>(Channels::FrontRight)]) *
-                            volume),
-                        min, max))};
-
-                    new_samples[write_index + static_cast<u32>(Channels::FrontRight)] =
-                        right_sample;
-                }
-                samples = std::move(new_samples);
-
-            } else if (volume != 1.0f) {
-                for (u32 i = 0; i < samples.size(); i++) {
-                    samples[i] = static_cast<s16>(std::clamp(
-                        static_cast<s32>(static_cast<f32>(samples[i]) * volume), min, max));
-                }
-            }
-
-            samples_buffer.Push(samples);
-            queue.enqueue(buffer);
-            queued_buffers++;
-        }
-    }
-
-    /**
-     * Release a buffer. Audio In only, will fill a buffer with recorded samples.
-     *
-     * @param num_samples - Maximum number of samples to receive.
-     * @return Vector of recorded samples. May have fewer than num_samples.
-     */
-    std::vector<s16> ReleaseBuffer(const u64 num_samples) override {
-        static constexpr s32 min = std::numeric_limits<s16>::min();
-        static constexpr s32 max = std::numeric_limits<s16>::max();
-
-        auto samples{samples_buffer.Pop(num_samples)};
-
-        // TODO: Up-mix to 6 channels if the game expects it.
-        // For audio input this is unlikely to ever be the case though.
-
-        // Incoming mic volume seems to always be very quiet, so multiply by an additional 8 here.
-        // TODO: Play with this and find something that works better.
-        auto volume{system_volume * device_volume * 8};
-        for (u32 i = 0; i < samples.size(); i++) {
-            samples[i] = static_cast<s16>(
-                std::clamp(static_cast<s32>(static_cast<f32>(samples[i]) * volume), min, max));
-        }
-
-        if (samples.size() < num_samples) {
-            samples.resize(num_samples, 0);
-        }
-        return samples;
-    }
-
-    /**
-     * Check if a certain buffer has been consumed (fully played).
-     *
-     * @param tag - Unique tag of a buffer to check for.
-     * @return True if the buffer has been played, otherwise false.
-     */
-    bool IsBufferConsumed(const u64 tag) override {
-        if (released_buffer.tag == 0) {
-            if (!released_buffers.try_dequeue(released_buffer)) {
-                return false;
-            }
-        }
-
-        if (released_buffer.tag == tag) {
-            released_buffer.tag = 0;
-            return true;
-        }
-        return false;
-    }
-
-    /**
-     * Empty out the buffer queue.
-     */
-    void ClearQueue() override {
-        samples_buffer.Pop();
-        while (queue.pop()) {
-        }
-        while (released_buffers.pop()) {
-        }
-        queued_buffers = 0;
-        released_buffer = {};
-        playing_buffer = {};
-        playing_buffer.consumed = true;
-    }
-
 private:
-    /**
-     * Signal events back to the audio system that a buffer was played/can be filled.
-     *
-     * @param buffer - Consumed audio buffer to be released.
-     */
-    void SignalEvent(const ::AudioCore::Sink::SinkBuffer& buffer) {
-        auto& manager{system.AudioCore().GetAudioManager()};
-        switch (type) {
-        case StreamType::Out:
-            released_buffers.enqueue(buffer);
-            manager.SetEvent(Event::Type::AudioOutManager, true);
-            break;
-        case StreamType::In:
-            released_buffers.enqueue(buffer);
-            manager.SetEvent(Event::Type::AudioInManager, true);
-            break;
-        case StreamType::Render:
-            break;
-        }
-    }
-
     /**
      * Main callback from Cubeb. Either expects samples from us (audio render/audio out), or will
      * provide samples to be copied (audio in).
@@ -378,106 +184,15 @@ private:
 
         const std::size_t num_channels = impl->GetDeviceChannels();
         const std::size_t frame_size = num_channels;
-        const std::size_t frame_size_bytes = frame_size * sizeof(s16);
         const std::size_t num_frames{static_cast<size_t>(num_frames_)};
-        size_t frames_written{0};
-        [[maybe_unused]] bool underrun{false};
 
         if (impl->type == StreamType::In) {
-            // INPUT
             std::span<const s16> input_buffer{reinterpret_cast<const s16*>(in_buff),
                                               num_frames * frame_size};
-
-            while (frames_written < num_frames) {
-                auto& playing_buffer{impl->playing_buffer};
-
-                // If the playing buffer has been consumed or has no frames, we need a new one
-                if (playing_buffer.consumed || playing_buffer.frames == 0) {
-                    if (!impl->queue.try_dequeue(impl->playing_buffer)) {
-                        // If no buffer was available we've underrun, just push the samples and
-                        // continue.
-                        underrun = true;
-                        impl->samples_buffer.Push(&input_buffer[frames_written * frame_size],
-                                                  (num_frames - frames_written) * frame_size);
-                        frames_written = num_frames;
-                        continue;
-                    } else {
-                        // Successfully got a new buffer, mark the old one as consumed and signal.
-                        impl->queued_buffers--;
-                        impl->SignalEvent(impl->playing_buffer);
-                    }
-                }
-
-                // Get the minimum frames available between the currently playing buffer, and the
-                // amount we have left to fill
-                size_t frames_available{
-                    std::min(playing_buffer.frames - playing_buffer.frames_played,
-                             num_frames - frames_written)};
-
-                impl->samples_buffer.Push(&input_buffer[frames_written * frame_size],
-                                          frames_available * frame_size);
-
-                frames_written += frames_available;
-                playing_buffer.frames_played += frames_available;
-
-                // If that's all the frames in the current buffer, add its samples and mark it as
-                // consumed
-                if (playing_buffer.frames_played >= playing_buffer.frames) {
-                    impl->AddPlayedSampleCount(playing_buffer.frames_played * num_channels);
-                    impl->playing_buffer.consumed = true;
-                }
-            }
-
-            std::memcpy(&impl->last_frame[0], &input_buffer[(frames_written - 1) * frame_size],
-                        frame_size_bytes);
+            impl->ProcessAudioIn(input_buffer, num_frames);
         } else {
-            // OUTPUT
             std::span<s16> output_buffer{reinterpret_cast<s16*>(out_buff), num_frames * frame_size};
-
-            while (frames_written < num_frames) {
-                auto& playing_buffer{impl->playing_buffer};
-
-                // If the playing buffer has been consumed or has no frames, we need a new one
-                if (playing_buffer.consumed || playing_buffer.frames == 0) {
-                    if (!impl->queue.try_dequeue(impl->playing_buffer)) {
-                        // If no buffer was available we've underrun, fill the remaining buffer with
-                        // the last written frame and continue.
-                        underrun = true;
-                        for (size_t i = frames_written; i < num_frames; i++) {
-                            std::memcpy(&output_buffer[i * frame_size], &impl->last_frame[0],
-                                        frame_size_bytes);
-                        }
-                        frames_written = num_frames;
-                        continue;
-                    } else {
-                        // Successfully got a new buffer, mark the old one as consumed and signal.
-                        impl->queued_buffers--;
-                        impl->SignalEvent(impl->playing_buffer);
-                    }
-                }
-
-                // Get the minimum frames available between the currently playing buffer, and the
-                // amount we have left to fill
-                size_t frames_available{
-                    std::min(playing_buffer.frames - playing_buffer.frames_played,
-                             num_frames - frames_written)};
-
-                impl->samples_buffer.Pop(&output_buffer[frames_written * frame_size],
-                                         frames_available * frame_size);
-
-                frames_written += frames_available;
-                playing_buffer.frames_played += frames_available;
-
-                // If that's all the frames in the current buffer, add its samples and mark it as
-                // consumed
-                if (playing_buffer.frames_played >= playing_buffer.frames) {
-                    impl->AddPlayedSampleCount(playing_buffer.frames_played * num_channels);
-                    impl->playing_buffer.consumed = true;
-                }
-            }
-
-            std::memcpy(&impl->last_frame[0], &output_buffer[(frames_written - 1) * frame_size],
-                        frame_size_bytes);
+            impl->ProcessAudioOutAndRender(output_buffer, num_frames);
         }
 
         return num_frames_;
@@ -490,32 +205,12 @@ private:
      * @param user_data   - Custom data pointer passed along, points to a CubebSinkStream.
      * @param state       - New state of the device.
      */
-    static void StateCallback([[maybe_unused]] cubeb_stream* stream,
-                              [[maybe_unused]] void* user_data,
-                              [[maybe_unused]] cubeb_state state) {}
+    static void StateCallback(cubeb_stream*, void*, cubeb_state) {}
 
     /// Main Cubeb context
     cubeb* ctx{};
     /// Cubeb stream backend
     cubeb_stream* stream_backend{};
-    /// Name of this stream
-    std::string name{};
-    /// Type of this stream
-    StreamType type;
-    /// Core system
-    Core::System& system;
-    /// Ring buffer of the samples waiting to be played or consumed
-    Common::RingBuffer<s16, 0x10000> samples_buffer;
-    /// Audio buffers queued and waiting to play
-    Common::ReaderWriterQueue<::AudioCore::Sink::SinkBuffer> queue;
-    /// The currently-playing audio buffer
-    ::AudioCore::Sink::SinkBuffer playing_buffer{};
-    /// Audio buffers which have been played and are in queue to be released by the audio system
-    Common::ReaderWriterQueue<::AudioCore::Sink::SinkBuffer> released_buffers{};
-    /// Currently released buffer waiting to be taken by the audio system
-    ::AudioCore::Sink::SinkBuffer released_buffer{};
-    /// The last played (or received) frame of audio, used when the callback underruns
-    std::array<s16, MaxChannels> last_frame{};
 };
 
 CubebSink::CubebSink(std::string_view target_device_name) {
@@ -569,15 +264,15 @@ CubebSink::~CubebSink() {
 #endif
 }
 
-SinkStream* CubebSink::AcquireSinkStream(Core::System& system, const u32 system_channels,
-                                         const std::string& name, const StreamType type) {
+SinkStream* CubebSink::AcquireSinkStream(Core::System& system, u32 system_channels,
+                                         const std::string& name, StreamType type) {
     SinkStreamPtr& stream = sink_streams.emplace_back(std::make_unique<CubebSinkStream>(
         ctx, device_channels, system_channels, output_device, input_device, name, type, system));
 
     return stream.get();
 }
 
-void CubebSink::CloseStream(const SinkStream* stream) {
+void CubebSink::CloseStream(SinkStream* stream) {
     for (size_t i = 0; i < sink_streams.size(); i++) {
         if (sink_streams[i].get() == stream) {
             sink_streams[i].reset();
@@ -611,19 +306,19 @@ f32 CubebSink::GetDeviceVolume() const {
     return sink_streams[0]->GetDeviceVolume();
 }
 
-void CubebSink::SetDeviceVolume(const f32 volume) {
+void CubebSink::SetDeviceVolume(f32 volume) {
     for (auto& stream : sink_streams) {
         stream->SetDeviceVolume(volume);
     }
 }
 
-void CubebSink::SetSystemVolume(const f32 volume) {
+void CubebSink::SetSystemVolume(f32 volume) {
     for (auto& stream : sink_streams) {
         stream->SetSystemVolume(volume);
     }
 }
 
-std::vector<std::string> ListCubebSinkDevices(const bool capture) {
+std::vector<std::string> ListCubebSinkDevices(bool capture) {
     std::vector<std::string> device_list;
     cubeb* ctx;
 
diff --git a/src/audio_core/sink/cubeb_sink.h b/src/audio_core/sink/cubeb_sink.h
index f0f43dfa1..91a6480fa 100644
--- a/src/audio_core/sink/cubeb_sink.h
+++ b/src/audio_core/sink/cubeb_sink.h
@@ -46,7 +46,7 @@ public:
      *
      * @param stream - The stream to close.
      */
-    void CloseStream(const SinkStream* stream) override;
+    void CloseStream(SinkStream* stream) override;
 
     /**
      * Close all streams.
diff --git a/src/audio_core/sink/null_sink.h b/src/audio_core/sink/null_sink.h
index 47a342171..eab9c3a0c 100644
--- a/src/audio_core/sink/null_sink.h
+++ b/src/audio_core/sink/null_sink.h
@@ -3,10 +3,29 @@
 
 #pragma once
 
+#include <string>
+#include <string_view>
+#include <vector>
+
 #include "audio_core/sink/sink.h"
 #include "audio_core/sink/sink_stream.h"
 
+namespace Core {
+class System;
+} // namespace Core
+
 namespace AudioCore::Sink {
+class NullSinkStreamImpl final : public SinkStream {
+public:
+    explicit NullSinkStreamImpl(Core::System& system_, StreamType type_)
+        : SinkStream{system_, type_} {}
+    ~NullSinkStreamImpl() override {}
+    void AppendBuffer(SinkBuffer&, std::vector<s16>&) override {}
+    std::vector<s16> ReleaseBuffer(u64) override {
+        return {};
+    }
+};
+
 /**
  * A no-op sink for when no audio out is wanted.
  */
@@ -15,14 +34,15 @@ public:
     explicit NullSink(std::string_view) {}
     ~NullSink() override = default;
 
-    SinkStream* AcquireSinkStream([[maybe_unused]] Core::System& system,
-                                  [[maybe_unused]] u32 system_channels,
-                                  [[maybe_unused]] const std::string& name,
-                                  [[maybe_unused]] StreamType type) override {
-        return &null_sink_stream;
+    SinkStream* AcquireSinkStream(Core::System& system, u32, const std::string&,
+                                  StreamType type) override {
+        if (null_sink == nullptr) {
+            null_sink = std::make_unique<NullSinkStreamImpl>(system, type);
+        }
+        return null_sink.get();
     }
 
-    void CloseStream([[maybe_unused]] const SinkStream* stream) override {}
+    void CloseStream(SinkStream*) override {}
     void CloseStreams() override {}
     void PauseStreams() override {}
     void UnpauseStreams() override {}
@@ -33,20 +53,7 @@ public:
     void SetSystemVolume(f32 volume) override {}
 
 private:
-    struct NullSinkStreamImpl final : SinkStream {
-        void Finalize() override {}
-        void Start(bool resume = false) override {}
-        void Stop() override {}
-        void AppendBuffer([[maybe_unused]] ::AudioCore::Sink::SinkBuffer& buffer,
-                          [[maybe_unused]] std::vector<s16>& samples) override {}
-        std::vector<s16> ReleaseBuffer([[maybe_unused]] u64 num_samples) override {
-            return {};
-        }
-        bool IsBufferConsumed([[maybe_unused]] const u64 tag) {
-            return true;
-        }
-        void ClearQueue() override {}
-    } null_sink_stream;
+    SinkStreamPtr null_sink{};
 };
 
 } // namespace AudioCore::Sink
diff --git a/src/audio_core/sink/sdl2_sink.cpp b/src/audio_core/sink/sdl2_sink.cpp
index d6c9ec90d..7ee1dd7cd 100644
--- a/src/audio_core/sink/sdl2_sink.cpp
+++ b/src/audio_core/sink/sdl2_sink.cpp
@@ -1,20 +1,13 @@
 // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
-#include <algorithm>
-#include <atomic>
+#include <span>
+#include <vector>
 
-#include "audio_core/audio_core.h"
-#include "audio_core/audio_event.h"
-#include "audio_core/audio_manager.h"
+#include "audio_core/common/common.h"
 #include "audio_core/sink/sdl2_sink.h"
 #include "audio_core/sink/sink_stream.h"
-#include "common/assert.h"
-#include "common/fixed_point.h"
 #include "common/logging/log.h"
-#include "common/reader_writer_queue.h"
-#include "common/ring_buffer.h"
-#include "common/settings.h"
 #include "core/core.h"
 
 // Ignore -Wimplicit-fallthrough due to https://github.com/libsdl-org/SDL/issues/4307
@@ -44,10 +37,9 @@ public:
      * @param system_          - Core system.
      * @param event            - Event used only for audio renderer, signalled on buffer consume.
      */
-    SDLSinkStream(u32 device_channels_, const u32 system_channels_,
-                  const std::string& output_device, const std::string& input_device,
-                  const StreamType type_, Core::System& system_)
-        : type{type_}, system{system_} {
+    SDLSinkStream(u32 device_channels_, u32 system_channels_, const std::string& output_device,
+                  const std::string& input_device, StreamType type_, Core::System& system_)
+        : SinkStream{system_, type_} {
         system_channels = system_channels_;
         device_channels = device_channels_;
 
@@ -63,8 +55,6 @@ public:
         spec.callback = &SDLSinkStream::DataCallback;
         spec.userdata = this;
 
-        playing_buffer.consumed = true;
-
         std::string device_name{output_device};
         bool capture{false};
         if (type == StreamType::In) {
@@ -84,31 +74,30 @@ public:
             return;
         }
 
-        LOG_DEBUG(Service_Audio,
-                  "Opening sdl stream {} with: rate {} channels {} (system channels {}) "
-                  " samples {}",
-                  device, obtained.freq, obtained.channels, system_channels, obtained.samples);
+        LOG_INFO(Service_Audio,
+                 "Opening SDL stream {} with: rate {} channels {} (system channels {}) "
+                 " samples {}",
+                 device, obtained.freq, obtained.channels, system_channels, obtained.samples);
     }
 
     /**
      * Destroy the sink stream.
      */
     ~SDLSinkStream() override {
-        if (device == 0) {
-            return;
-        }
-
-        SDL_CloseAudioDevice(device);
+        LOG_DEBUG(Service_Audio, "Destructing SDL stream {}", name);
+        Finalize();
     }
 
     /**
      * Finalize the sink stream.
      */
     void Finalize() override {
+        Unstall();
         if (device == 0) {
             return;
         }
 
+        Stop();
         SDL_CloseAudioDevice(device);
     }
 
@@ -118,7 +107,7 @@ public:
      * @param resume - Set to true if this is resuming the stream a previously-active stream.
      *                 Default false.
      */
-    void Start(const bool resume = false) override {
+    void Start(bool resume = false) override {
         if (device == 0) {
             return;
         }
@@ -135,7 +124,8 @@ public:
     /**
      * Stop the sink stream.
      */
-    void Stop() {
+    void Stop() override {
+        Unstall();
         if (device == 0) {
             return;
         }
@@ -143,191 +133,7 @@ public:
         paused = true;
     }
 
-    /**
-     * Append a new buffer and its samples to a waiting queue to play.
-     *
-     * @param buffer  - Audio buffer information to be queued.
-     * @param samples - The s16 samples to be queue for playback.
-     */
-    void AppendBuffer(::AudioCore::Sink::SinkBuffer& buffer, std::vector<s16>& samples) override {
-        if (type == StreamType::In) {
-            queue.enqueue(buffer);
-            queued_buffers++;
-        } else {
-            constexpr s32 min = std::numeric_limits<s16>::min();
-            constexpr s32 max = std::numeric_limits<s16>::max();
-
-            auto yuzu_volume{Settings::Volume()};
-            auto volume{system_volume * device_volume * yuzu_volume};
-
-            if (system_channels == 6 && device_channels == 2) {
-                // We're given 6 channels, but our device only outputs 2, so downmix.
-                constexpr std::array<f32, 4> down_mix_coeff{1.0f, 0.707f, 0.251f, 0.707f};
-
-                for (u32 read_index = 0, write_index = 0; read_index < samples.size();
-                     read_index += system_channels, write_index += device_channels) {
-                    const auto left_sample{
-                        ((Common::FixedPoint<49, 15>(
-                              samples[read_index + static_cast<u32>(Channels::FrontLeft)]) *
-                              down_mix_coeff[0] +
-                          samples[read_index + static_cast<u32>(Channels::Center)] *
-                              down_mix_coeff[1] +
-                          samples[read_index + static_cast<u32>(Channels::LFE)] *
-                              down_mix_coeff[2] +
-                          samples[read_index + static_cast<u32>(Channels::BackLeft)] *
-                              down_mix_coeff[3]) *
-                         volume)
-                            .to_int()};
-
-                    const auto right_sample{
-                        ((Common::FixedPoint<49, 15>(
-                              samples[read_index + static_cast<u32>(Channels::FrontRight)]) *
-                              down_mix_coeff[0] +
-                          samples[read_index + static_cast<u32>(Channels::Center)] *
-                              down_mix_coeff[1] +
-                          samples[read_index + static_cast<u32>(Channels::LFE)] *
-                              down_mix_coeff[2] +
-                          samples[read_index + static_cast<u32>(Channels::BackRight)] *
-                              down_mix_coeff[3]) *
-                         volume)
-                            .to_int()};
-
-                    samples[write_index + static_cast<u32>(Channels::FrontLeft)] =
-                        static_cast<s16>(std::clamp(left_sample, min, max));
-                    samples[write_index + static_cast<u32>(Channels::FrontRight)] =
-                        static_cast<s16>(std::clamp(right_sample, min, max));
-                }
-
-                samples.resize(samples.size() / system_channels * device_channels);
-
-            } else if (system_channels == 2 && device_channels == 6) {
-                // We need moar samples! Not all games will provide 6 channel audio.
-                // TODO: Implement some upmixing here. Currently just passthrough, with other
-                // channels left as silence.
-                std::vector<s16> new_samples(samples.size() / system_channels * device_channels, 0);
-
-                for (u32 read_index = 0, write_index = 0; read_index < samples.size();
-                     read_index += system_channels, write_index += device_channels) {
-                    const auto left_sample{static_cast<s16>(std::clamp(
-                        static_cast<s32>(
-                            static_cast<f32>(
-                                samples[read_index + static_cast<u32>(Channels::FrontLeft)]) *
-                            volume),
-                        min, max))};
-
-                    new_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample;
-
-                    const auto right_sample{static_cast<s16>(std::clamp(
-                        static_cast<s32>(
-                            static_cast<f32>(
-                                samples[read_index + static_cast<u32>(Channels::FrontRight)]) *
-                            volume),
-                        min, max))};
-
-                    new_samples[write_index + static_cast<u32>(Channels::FrontRight)] =
-                        right_sample;
-                }
-                samples = std::move(new_samples);
-
-            } else if (volume != 1.0f) {
-                for (u32 i = 0; i < samples.size(); i++) {
-                    samples[i] = static_cast<s16>(std::clamp(
-                        static_cast<s32>(static_cast<f32>(samples[i]) * volume), min, max));
-                }
-            }
-
-            samples_buffer.Push(samples);
-            queue.enqueue(buffer);
-            queued_buffers++;
-        }
-    }
-
-    /**
-     * Release a buffer. Audio In only, will fill a buffer with recorded samples.
-     *
-     * @param num_samples - Maximum number of samples to receive.
-     * @return Vector of recorded samples. May have fewer than num_samples.
-     */
-    std::vector<s16> ReleaseBuffer(const u64 num_samples) override {
-        static constexpr s32 min = std::numeric_limits<s16>::min();
-        static constexpr s32 max = std::numeric_limits<s16>::max();
-
-        auto samples{samples_buffer.Pop(num_samples)};
-
-        // TODO: Up-mix to 6 channels if the game expects it.
-        // For audio input this is unlikely to ever be the case though.
-
-        // Incoming mic volume seems to always be very quiet, so multiply by an additional 8 here.
-        // TODO: Play with this and find something that works better.
-        auto volume{system_volume * device_volume * 8};
-        for (u32 i = 0; i < samples.size(); i++) {
-            samples[i] = static_cast<s16>(
-                std::clamp(static_cast<s32>(static_cast<f32>(samples[i]) * volume), min, max));
-        }
-
-        if (samples.size() < num_samples) {
-            samples.resize(num_samples, 0);
-        }
-        return samples;
-    }
-
-    /**
-     * Check if a certain buffer has been consumed (fully played).
-     *
-     * @param tag - Unique tag of a buffer to check for.
-     * @return True if the buffer has been played, otherwise false.
-     */
-    bool IsBufferConsumed(const u64 tag) override {
-        if (released_buffer.tag == 0) {
-            if (!released_buffers.try_dequeue(released_buffer)) {
-                return false;
-            }
-        }
-
-        if (released_buffer.tag == tag) {
-            released_buffer.tag = 0;
-            return true;
-        }
-        return false;
-    }
-
-    /**
-     * Empty out the buffer queue.
-     */
-    void ClearQueue() override {
-        samples_buffer.Pop();
-        while (queue.pop()) {
-        }
-        while (released_buffers.pop()) {
-        }
-        released_buffer = {};
-        playing_buffer = {};
-        playing_buffer.consumed = true;
-        queued_buffers = 0;
-    }
-
 private:
-    /**
-     * Signal events back to the audio system that a buffer was played/can be filled.
-     *
-     * @param buffer - Consumed audio buffer to be released.
-     */
-    void SignalEvent(const ::AudioCore::Sink::SinkBuffer& buffer) {
-        auto& manager{system.AudioCore().GetAudioManager()};
-        switch (type) {
-        case StreamType::Out:
-            released_buffers.enqueue(buffer);
-            manager.SetEvent(Event::Type::AudioOutManager, true);
-            break;
-        case StreamType::In:
-            released_buffers.enqueue(buffer);
-            manager.SetEvent(Event::Type::AudioInManager, true);
-            break;
-        case StreamType::Render:
-            break;
-        }
-    }
-
     /**
      * Main callback from SDL. Either expects samples from us (audio render/audio out), or will
      * provide samples to be copied (audio in).
@@ -345,122 +151,20 @@ private:
 
         const std::size_t num_channels = impl->GetDeviceChannels();
         const std::size_t frame_size = num_channels;
-        const std::size_t frame_size_bytes = frame_size * sizeof(s16);
         const std::size_t num_frames{len / num_channels / sizeof(s16)};
-        size_t frames_written{0};
-        [[maybe_unused]] bool underrun{false};
 
         if (impl->type == StreamType::In) {
-            std::span<s16> input_buffer{reinterpret_cast<s16*>(stream), num_frames * frame_size};
-
-            while (frames_written < num_frames) {
-                auto& playing_buffer{impl->playing_buffer};
-
-                // If the playing buffer has been consumed or has no frames, we need a new one
-                if (playing_buffer.consumed || playing_buffer.frames == 0) {
-                    if (!impl->queue.try_dequeue(impl->playing_buffer)) {
-                        // If no buffer was available we've underrun, just push the samples and
-                        // continue.
-                        underrun = true;
-                        impl->samples_buffer.Push(&input_buffer[frames_written * frame_size],
-                                                  (num_frames - frames_written) * frame_size);
-                        frames_written = num_frames;
-                        continue;
-                    } else {
-                        impl->queued_buffers--;
-                        impl->SignalEvent(impl->playing_buffer);
-                    }
-                }
-
-                // Get the minimum frames available between the currently playing buffer, and the
-                // amount we have left to fill
-                size_t frames_available{
-                    std::min(playing_buffer.frames - playing_buffer.frames_played,
-                             num_frames - frames_written)};
-
-                impl->samples_buffer.Push(&input_buffer[frames_written * frame_size],
-                                          frames_available * frame_size);
-
-                frames_written += frames_available;
-                playing_buffer.frames_played += frames_available;
-
-                // If that's all the frames in the current buffer, add its samples and mark it as
-                // consumed
-                if (playing_buffer.frames_played >= playing_buffer.frames) {
-                    impl->AddPlayedSampleCount(playing_buffer.frames_played * num_channels);
-                    impl->playing_buffer.consumed = true;
-                }
-            }
-
-            std::memcpy(&impl->last_frame[0], &input_buffer[(frames_written - 1) * frame_size],
-                        frame_size_bytes);
+            std::span<const s16> input_buffer{reinterpret_cast<const s16*>(stream),
+                                              num_frames * frame_size};
+            impl->ProcessAudioIn(input_buffer, num_frames);
         } else {
             std::span<s16> output_buffer{reinterpret_cast<s16*>(stream), num_frames * frame_size};
-
-            while (frames_written < num_frames) {
-                auto& playing_buffer{impl->playing_buffer};
-
-                // If the playing buffer has been consumed or has no frames, we need a new one
-                if (playing_buffer.consumed || playing_buffer.frames == 0) {
-                    if (!impl->queue.try_dequeue(impl->playing_buffer)) {
-                        // If no buffer was available we've underrun, fill the remaining buffer with
-                        // the last written frame and continue.
-                        underrun = true;
-                        for (size_t i = frames_written; i < num_frames; i++) {
-                            std::memcpy(&output_buffer[i * frame_size], &impl->last_frame[0],
-                                        frame_size_bytes);
-                        }
-                        frames_written = num_frames;
-                        continue;
-                    } else {
-                        impl->queued_buffers--;
-                        impl->SignalEvent(impl->playing_buffer);
-                    }
-                }
-
-                // Get the minimum frames available between the currently playing buffer, and the
-                // amount we have left to fill
-                size_t frames_available{
-                    std::min(playing_buffer.frames - playing_buffer.frames_played,
-                             num_frames - frames_written)};
-
-                impl->samples_buffer.Pop(&output_buffer[frames_written * frame_size],
-                                         frames_available * frame_size);
-
-                frames_written += frames_available;
-                playing_buffer.frames_played += frames_available;
-
-                // If that's all the frames in the current buffer, add its samples and mark it as
-                // consumed
-                if (playing_buffer.frames_played >= playing_buffer.frames) {
-                    impl->AddPlayedSampleCount(playing_buffer.frames_played * num_channels);
-                    impl->playing_buffer.consumed = true;
-                }
-            }
-
-            std::memcpy(&impl->last_frame[0], &output_buffer[(frames_written - 1) * frame_size],
-                        frame_size_bytes);
+            impl->ProcessAudioOutAndRender(output_buffer, num_frames);
         }
     }
 
     /// SDL device id of the opened input/output device
     SDL_AudioDeviceID device{};
-    /// Type of this stream
-    StreamType type;
-    /// Core system
-    Core::System& system;
-    /// Ring buffer of the samples waiting to be played or consumed
-    Common::RingBuffer<s16, 0x10000> samples_buffer;
-    /// Audio buffers queued and waiting to play
-    Common::ReaderWriterQueue<::AudioCore::Sink::SinkBuffer> queue;
-    /// The currently-playing audio buffer
-    ::AudioCore::Sink::SinkBuffer playing_buffer{};
-    /// Audio buffers which have been played and are in queue to be released by the audio system
-    Common::ReaderWriterQueue<::AudioCore::Sink::SinkBuffer> released_buffers{};
-    /// Currently released buffer waiting to be taken by the audio system
-    ::AudioCore::Sink::SinkBuffer released_buffer{};
-    /// The last played (or received) frame of audio, used when the callback underruns
-    std::array<s16, MaxChannels> last_frame{};
 };
 
 SDLSink::SDLSink(std::string_view target_device_name) {
@@ -482,14 +186,14 @@ SDLSink::SDLSink(std::string_view target_device_name) {
 
 SDLSink::~SDLSink() = default;
 
-SinkStream* SDLSink::AcquireSinkStream(Core::System& system, const u32 system_channels,
-                                       const std::string&, const StreamType type) {
+SinkStream* SDLSink::AcquireSinkStream(Core::System& system, u32 system_channels,
+                                       const std::string&, StreamType type) {
     SinkStreamPtr& stream = sink_streams.emplace_back(std::make_unique<SDLSinkStream>(
         device_channels, system_channels, output_device, input_device, type, system));
     return stream.get();
 }
 
-void SDLSink::CloseStream(const SinkStream* stream) {
+void SDLSink::CloseStream(SinkStream* stream) {
     for (size_t i = 0; i < sink_streams.size(); i++) {
         if (sink_streams[i].get() == stream) {
             sink_streams[i].reset();
@@ -523,19 +227,19 @@ f32 SDLSink::GetDeviceVolume() const {
     return sink_streams[0]->GetDeviceVolume();
 }
 
-void SDLSink::SetDeviceVolume(const f32 volume) {
+void SDLSink::SetDeviceVolume(f32 volume) {
     for (auto& stream : sink_streams) {
         stream->SetDeviceVolume(volume);
     }
 }
 
-void SDLSink::SetSystemVolume(const f32 volume) {
+void SDLSink::SetSystemVolume(f32 volume) {
     for (auto& stream : sink_streams) {
         stream->SetSystemVolume(volume);
     }
 }
 
-std::vector<std::string> ListSDLSinkDevices(const bool capture) {
+std::vector<std::string> ListSDLSinkDevices(bool capture) {
     std::vector<std::string> device_list;
 
     if (!SDL_WasInit(SDL_INIT_AUDIO)) {
diff --git a/src/audio_core/sink/sdl2_sink.h b/src/audio_core/sink/sdl2_sink.h
index 186bc2fa3..57de9b6c2 100644
--- a/src/audio_core/sink/sdl2_sink.h
+++ b/src/audio_core/sink/sdl2_sink.h
@@ -44,7 +44,7 @@ public:
      *
      * @param stream - The stream to close.
      */
-    void CloseStream(const SinkStream* stream) override;
+    void CloseStream(SinkStream* stream) override;
 
     /**
      * Close all streams.
diff --git a/src/audio_core/sink/sink.h b/src/audio_core/sink/sink.h
index 91fe455e4..43d99b62e 100644
--- a/src/audio_core/sink/sink.h
+++ b/src/audio_core/sink/sink.h
@@ -32,7 +32,7 @@ public:
      *
      * @param stream - The stream to close.
      */
-    virtual void CloseStream(const SinkStream* stream) = 0;
+    virtual void CloseStream(SinkStream* stream) = 0;
 
     /**
      * Close all streams.
diff --git a/src/audio_core/sink/sink_details.cpp b/src/audio_core/sink/sink_details.cpp
index 253c0fd1e..67bdab779 100644
--- a/src/audio_core/sink/sink_details.cpp
+++ b/src/audio_core/sink/sink_details.cpp
@@ -5,7 +5,7 @@
 #include <memory>
 #include <string>
 #include <vector>
-#include "audio_core/sink/null_sink.h"
+
 #include "audio_core/sink/sink_details.h"
 #ifdef HAVE_CUBEB
 #include "audio_core/sink/cubeb_sink.h"
@@ -13,6 +13,7 @@
 #ifdef HAVE_SDL2
 #include "audio_core/sink/sdl2_sink.h"
 #endif
+#include "audio_core/sink/null_sink.h"
 #include "common/logging/log.h"
 
 namespace AudioCore::Sink {
@@ -59,8 +60,7 @@ const SinkDetails& GetOutputSinkDetails(std::string_view sink_id) {
 
     if (sink_id == "auto" || iter == std::end(sink_details)) {
         if (sink_id != "auto") {
-            LOG_ERROR(Audio, "AudioCore::Sink::GetOutputSinkDetails given invalid sink_id {}",
-                      sink_id);
+            LOG_ERROR(Audio, "Invalid sink_id {}", sink_id);
         }
         // Auto-select.
         // sink_details is ordered in terms of desirability, with the best choice at the front.
diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp
new file mode 100644
index 000000000..3770c515d
--- /dev/null
+++ b/src/audio_core/sink/sink_stream.cpp
@@ -0,0 +1,259 @@
+// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#pragma once
+
+#include <array>
+#include <atomic>
+#include <memory>
+#include <span>
+#include <vector>
+
+#include "audio_core/common/common.h"
+#include "audio_core/sink/sink_stream.h"
+#include "common/common_types.h"
+#include "common/fixed_point.h"
+#include "common/settings.h"
+#include "core/core.h"
+
+namespace AudioCore::Sink {
+
+void SinkStream::AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) {
+    if (type == StreamType::In) {
+        queue.enqueue(buffer);
+        queued_buffers++;
+        return;
+    }
+
+    constexpr s32 min{std::numeric_limits<s16>::min()};
+    constexpr s32 max{std::numeric_limits<s16>::max()};
+
+    auto yuzu_volume{Settings::Volume()};
+    if (yuzu_volume > 1.0f) {
+        yuzu_volume = 0.6f + 20 * std::log10(yuzu_volume);
+    }
+    auto volume{system_volume * device_volume * yuzu_volume};
+
+    if (system_channels == 6 && device_channels == 2) {
+        // We're given 6 channels, but our device only outputs 2, so downmix.
+        constexpr std::array<f32, 4> down_mix_coeff{1.0f, 0.707f, 0.251f, 0.707f};
+
+        for (u32 read_index = 0, write_index = 0; read_index < samples.size();
+             read_index += system_channels, write_index += device_channels) {
+            const auto left_sample{
+                ((Common::FixedPoint<49, 15>(
+                      samples[read_index + static_cast<u32>(Channels::FrontLeft)]) *
+                      down_mix_coeff[0] +
+                  samples[read_index + static_cast<u32>(Channels::Center)] * down_mix_coeff[1] +
+                  samples[read_index + static_cast<u32>(Channels::LFE)] * down_mix_coeff[2] +
+                  samples[read_index + static_cast<u32>(Channels::BackLeft)] * down_mix_coeff[3]) *
+                 volume)
+                    .to_int()};
+
+            const auto right_sample{
+                ((Common::FixedPoint<49, 15>(
+                      samples[read_index + static_cast<u32>(Channels::FrontRight)]) *
+                      down_mix_coeff[0] +
+                  samples[read_index + static_cast<u32>(Channels::Center)] * down_mix_coeff[1] +
+                  samples[read_index + static_cast<u32>(Channels::LFE)] * down_mix_coeff[2] +
+                  samples[read_index + static_cast<u32>(Channels::BackRight)] * down_mix_coeff[3]) *
+                 volume)
+                    .to_int()};
+
+            samples[write_index + static_cast<u32>(Channels::FrontLeft)] =
+                static_cast<s16>(std::clamp(left_sample, min, max));
+            samples[write_index + static_cast<u32>(Channels::FrontRight)] =
+                static_cast<s16>(std::clamp(right_sample, min, max));
+        }
+
+        samples.resize(samples.size() / system_channels * device_channels);
+
+    } else if (system_channels == 2 && device_channels == 6) {
+        // We need moar samples! Not all games will provide 6 channel audio.
+        // TODO: Implement some upmixing here. Currently just passthrough, with other
+        // channels left as silence.
+        std::vector<s16> new_samples(samples.size() / system_channels * device_channels, 0);
+
+        for (u32 read_index = 0, write_index = 0; read_index < samples.size();
+             read_index += system_channels, write_index += device_channels) {
+            const auto left_sample{static_cast<s16>(std::clamp(
+                static_cast<s32>(
+                    static_cast<f32>(samples[read_index + static_cast<u32>(Channels::FrontLeft)]) *
+                    volume),
+                min, max))};
+
+            new_samples[write_index + static_cast<u32>(Channels::FrontLeft)] = left_sample;
+
+            const auto right_sample{static_cast<s16>(std::clamp(
+                static_cast<s32>(
+                    static_cast<f32>(samples[read_index + static_cast<u32>(Channels::FrontRight)]) *
+                    volume),
+                min, max))};
+
+            new_samples[write_index + static_cast<u32>(Channels::FrontRight)] = right_sample;
+        }
+        samples = std::move(new_samples);
+
+    } else if (volume != 1.0f) {
+        for (u32 i = 0; i < samples.size(); i++) {
+            samples[i] = static_cast<s16>(
+                std::clamp(static_cast<s32>(static_cast<f32>(samples[i]) * volume), min, max));
+        }
+    }
+
+    samples_buffer.Push(samples);
+    queue.enqueue(buffer);
+    queued_buffers++;
+}
+
+std::vector<s16> SinkStream::ReleaseBuffer(u64 num_samples) {
+    constexpr s32 min = std::numeric_limits<s16>::min();
+    constexpr s32 max = std::numeric_limits<s16>::max();
+
+    auto samples{samples_buffer.Pop(num_samples)};
+
+    // TODO: Up-mix to 6 channels if the game expects it.
+    // For audio input this is unlikely to ever be the case though.
+
+    // Incoming mic volume seems to always be very quiet, so multiply by an additional 8 here.
+    // TODO: Play with this and find something that works better.
+    auto volume{system_volume * device_volume * 8};
+    for (u32 i = 0; i < samples.size(); i++) {
+        samples[i] = static_cast<s16>(
+            std::clamp(static_cast<s32>(static_cast<f32>(samples[i]) * volume), min, max));
+    }
+
+    if (samples.size() < num_samples) {
+        samples.resize(num_samples, 0);
+    }
+    return samples;
+}
+
+void SinkStream::ClearQueue() {
+    samples_buffer.Pop();
+    while (queue.pop()) {
+    }
+    queued_buffers = 0;
+    playing_buffer = {};
+    playing_buffer.consumed = true;
+}
+
+void SinkStream::ProcessAudioIn(std::span<const s16> input_buffer, std::size_t num_frames) {
+    const std::size_t num_channels = GetDeviceChannels();
+    const std::size_t frame_size = num_channels;
+    const std::size_t frame_size_bytes = frame_size * sizeof(s16);
+    size_t frames_written{0};
+
+    if (queued_buffers > max_queue_size) {
+        Stall();
+    }
+
+    while (frames_written < num_frames) {
+        // If the playing buffer has been consumed or has no frames, we need a new one
+        if (playing_buffer.consumed || playing_buffer.frames == 0) {
+            if (!queue.try_dequeue(playing_buffer)) {
+                // If no buffer was available we've underrun, just push the samples and
+                // continue.
+                samples_buffer.Push(&input_buffer[frames_written * frame_size],
+                                    (num_frames - frames_written) * frame_size);
+                frames_written = num_frames;
+                continue;
+            }
+            // Successfully dequeued a new buffer.
+            queued_buffers--;
+        }
+
+        // Get the minimum frames available between the currently playing buffer, and the
+        // amount we have left to fill
+        size_t frames_available{std::min(playing_buffer.frames - playing_buffer.frames_played,
+                                         num_frames - frames_written)};
+
+        samples_buffer.Push(&input_buffer[frames_written * frame_size],
+                            frames_available * frame_size);
+
+        frames_written += frames_available;
+        playing_buffer.frames_played += frames_available;
+
+        // If that's all the frames in the current buffer, add its samples and mark it as
+        // consumed
+        if (playing_buffer.frames_played >= playing_buffer.frames) {
+            playing_buffer.consumed = true;
+        }
+    }
+
+    std::memcpy(&last_frame[0], &input_buffer[(frames_written - 1) * frame_size], frame_size_bytes);
+
+    if (queued_buffers <= max_queue_size) {
+        Unstall();
+    }
+}
+
+void SinkStream::ProcessAudioOutAndRender(std::span<s16> output_buffer, std::size_t num_frames) {
+    const std::size_t num_channels = GetDeviceChannels();
+    const std::size_t frame_size = num_channels;
+    const std::size_t frame_size_bytes = frame_size * sizeof(s16);
+    size_t frames_written{0};
+
+    if (queued_buffers > max_queue_size) {
+        Stall();
+    }
+
+    while (frames_written < num_frames) {
+        // If the playing buffer has been consumed or has no frames, we need a new one
+        if (playing_buffer.consumed || playing_buffer.frames == 0) {
+            if (!queue.try_dequeue(playing_buffer)) {
+                // If no buffer was available we've underrun, fill the remaining buffer with
+                // the last written frame and continue.
+                for (size_t i = frames_written; i < num_frames; i++) {
+                    std::memcpy(&output_buffer[i * frame_size], &last_frame[0], frame_size_bytes);
+                }
+                frames_written = num_frames;
+                continue;
+            }
+            // Successfully dequeued a new buffer.
+            queued_buffers--;
+        }
+
+        // Get the minimum frames available between the currently playing buffer, and the
+        // amount we have left to fill
+        size_t frames_available{std::min(playing_buffer.frames - playing_buffer.frames_played,
+                                         num_frames - frames_written)};
+
+        samples_buffer.Pop(&output_buffer[frames_written * frame_size],
+                           frames_available * frame_size);
+
+        frames_written += frames_available;
+        playing_buffer.frames_played += frames_available;
+
+        // If that's all the frames in the current buffer, add its samples and mark it as
+        // consumed
+        if (playing_buffer.frames_played >= playing_buffer.frames) {
+            playing_buffer.consumed = true;
+        }
+    }
+
+    std::memcpy(&last_frame[0], &output_buffer[(frames_written - 1) * frame_size],
+                frame_size_bytes);
+
+    if (stalled && queued_buffers <= max_queue_size) {
+        Unstall();
+    }
+}
+
+void SinkStream::Stall() {
+    if (stalled) {
+        return;
+    }
+    stalled = true;
+    system.StallProcesses();
+}
+
+void SinkStream::Unstall() {
+    if (!stalled) {
+        return;
+    }
+    system.UnstallProcesses();
+    stalled = false;
+}
+
+} // namespace AudioCore::Sink
diff --git a/src/audio_core/sink/sink_stream.h b/src/audio_core/sink/sink_stream.h
index 17ed6593f..db7cff45e 100644
--- a/src/audio_core/sink/sink_stream.h
+++ b/src/audio_core/sink/sink_stream.h
@@ -3,12 +3,20 @@
 
 #pragma once
 
+#include <array>
 #include <atomic>
 #include <memory>
+#include <span>
 #include <vector>
 
 #include "audio_core/common/common.h"
 #include "common/common_types.h"
+#include "common/reader_writer_queue.h"
+#include "common/ring_buffer.h"
+
+namespace Core {
+class System;
+} // namespace Core
 
 namespace AudioCore::Sink {
 
@@ -34,20 +42,24 @@ struct SinkBuffer {
  * You should regularly call IsBufferConsumed with the unique SinkBuffer tag to check if the buffer
  * has been consumed.
  *
- * Since these are a FIFO queue, always check IsBufferConsumed in the same order you appended the
- * buffers, skipping a buffer will result in all following buffers to never release.
+ * Since these are a FIFO queue, IsBufferConsumed must be checked in the same order buffers were
+ * appended, skipping a buffer will result in the queue getting stuck, and all following buffers to
+ * never release.
  *
  * If the buffers appear to be stuck, you can stop and re-open an IAudioIn/IAudioOut service (this
  * is what games do), or call ClearQueue to flush all of the buffers without a full restart.
  */
 class SinkStream {
 public:
-    virtual ~SinkStream() = default;
+    explicit SinkStream(Core::System& system_, StreamType type_) : system{system_}, type{type_} {}
+    virtual ~SinkStream() {
+        Unstall();
+    }
 
     /**
      * Finalize the sink stream.
      */
-    virtual void Finalize() = 0;
+    virtual void Finalize() {}
 
     /**
      * Start the sink stream.
@@ -55,48 +67,19 @@ public:
      * @param resume - Set to true if this is resuming the stream a previously-active stream.
      *                 Default false.
      */
-    virtual void Start(bool resume = false) = 0;
+    virtual void Start(bool resume = false) {}
 
     /**
      * Stop the sink stream.
      */
-    virtual void Stop() = 0;
-
-    /**
-     * Append a new buffer and its samples to a waiting queue to play.
-     *
-     * @param buffer  - Audio buffer information to be queued.
-     * @param samples - The s16 samples to be queue for playback.
-     */
-    virtual void AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples) = 0;
-
-    /**
-     * Release a buffer. Audio In only, will fill a buffer with recorded samples.
-     *
-     * @param num_samples - Maximum number of samples to receive.
-     * @return Vector of recorded samples. May have fewer than num_samples.
-     */
-    virtual std::vector<s16> ReleaseBuffer(u64 num_samples) = 0;
-
-    /**
-     * Check if a certain buffer has been consumed (fully played).
-     *
-     * @param tag - Unique tag of a buffer to check for.
-     * @return True if the buffer has been played, otherwise false.
-     */
-    virtual bool IsBufferConsumed(u64 tag) = 0;
-
-    /**
-     * Empty out the buffer queue.
-     */
-    virtual void ClearQueue() = 0;
+    virtual void Stop() {}
 
     /**
      * Check if the stream is paused.
      *
      * @return True if paused, otherwise false.
      */
-    bool IsPaused() {
+    bool IsPaused() const {
         return paused;
     }
 
@@ -127,34 +110,6 @@ public:
         return device_channels;
     }
 
-    /**
-     * Get the total number of samples played by this stream.
-     *
-     * @return Number of samples played.
-     */
-    u64 GetPlayedSampleCount() const {
-        return played_sample_count;
-    }
-
-    /**
-     * Set the number of samples played.
-     * This is started and stopped on system start/stop.
-     *
-     * @param played_sample_count_ - Number of samples to set.
-     */
-    void SetPlayedSampleCount(u64 played_sample_count_) {
-        played_sample_count = played_sample_count_;
-    }
-
-    /**
-     * Add to the played sample count.
-     *
-     * @param num_samples - Number of samples to add.
-     */
-    void AddPlayedSampleCount(u64 num_samples) {
-        played_sample_count += num_samples;
-    }
-
     /**
      * Get the system volume.
      *
@@ -200,15 +155,65 @@ public:
         return queued_buffers.load();
     }
 
+    /**
+     * Set the maximum buffer queue size.
+     */
+    void SetRingSize(u32 ring_size) {
+        max_queue_size = ring_size;
+    }
+
+    /**
+     * Append a new buffer and its samples to a waiting queue to play.
+     *
+     * @param buffer  - Audio buffer information to be queued.
+     * @param samples - The s16 samples to be queue for playback.
+     */
+    virtual void AppendBuffer(SinkBuffer& buffer, std::vector<s16>& samples);
+
+    /**
+     * Release a buffer. Audio In only, will fill a buffer with recorded samples.
+     *
+     * @param num_samples - Maximum number of samples to receive.
+     * @return Vector of recorded samples. May have fewer than num_samples.
+     */
+    virtual std::vector<s16> ReleaseBuffer(u64 num_samples);
+
+    /**
+     * Empty out the buffer queue.
+     */
+    void ClearQueue();
+
+    /**
+     * Callback for AudioIn.
+     *
+     * @param input_buffer - Input buffer to be filled with samples.
+     * @param num_frames - Number of frames to be filled.
+     */
+    void ProcessAudioIn(std::span<const s16> input_buffer, std::size_t num_frames);
+
+    /**
+     * Callback for AudioOut and AudioRenderer.
+     *
+     * @param output_buffer - Output buffer to be filled with samples.
+     * @param num_frames - Number of frames to be filled.
+     */
+    void ProcessAudioOutAndRender(std::span<s16> output_buffer, std::size_t num_frames);
+
+    /**
+     * Stall core processes if the audio thread falls too far behind.
+     */
+    void Stall();
+
+    /**
+     * Unstall core processes.
+     */
+    void Unstall();
+
 protected:
-    /// Number of buffers waiting to be played
-    std::atomic<u32> queued_buffers{};
-    /// Total samples played by this stream
-    std::atomic<u64> played_sample_count{};
-    /// Set by the audio render/in/out system which uses this stream
-    f32 system_volume{1.0f};
-    /// Set via IAudioDevice service calls
-    f32 device_volume{1.0f};
+    /// Core system
+    Core::System& system;
+    /// Type of this stream
+    StreamType type;
     /// Set by the audio render/in/out systen which uses this stream
     u32 system_channels{2};
     /// Channels supported by hardware
@@ -217,6 +222,28 @@ protected:
     std::atomic<bool> paused{true};
     /// Was this stream previously playing?
     std::atomic<bool> was_playing{false};
+    /// Name of this stream
+    std::string name{};
+
+private:
+    /// Ring buffer of the samples waiting to be played or consumed
+    Common::RingBuffer<s16, 0x10000> samples_buffer;
+    /// Audio buffers queued and waiting to play
+    Common::ReaderWriterQueue<SinkBuffer> queue;
+    /// The currently-playing audio buffer
+    SinkBuffer playing_buffer{};
+    /// The last played (or received) frame of audio, used when the callback underruns
+    std::array<s16, MaxChannels> last_frame{};
+    /// Number of buffers waiting to be played
+    std::atomic<u32> queued_buffers{};
+    /// The ring size for audio out buffers (usually 4, rarely 2 or 8)
+    u32 max_queue_size{};
+    /// Set by the audio render/in/out system which uses this stream
+    f32 system_volume{1.0f};
+    /// Set via IAudioDevice service calls
+    f32 device_volume{1.0f};
+    /// True if coretiming has been stalled
+    bool stalled{false};
 };
 
 using SinkStreamPtr = std::unique_ptr<SinkStream>;
diff --git a/src/core/hle/result.h b/src/core/hle/result.h
index 4de44cd06..47a1b829b 100644
--- a/src/core/hle/result.h
+++ b/src/core/hle/result.h
@@ -117,6 +117,7 @@ union Result {
     BitField<0, 9, ErrorModule> module;
     BitField<9, 13, u32> description;
 
+    Result() = default;
     constexpr explicit Result(u32 raw_) : raw(raw_) {}
 
     constexpr Result(ErrorModule module_, u32 description_)
@@ -130,6 +131,7 @@ union Result {
         return !IsSuccess();
     }
 };
+static_assert(std::is_trivial_v<Result>);
 
 [[nodiscard]] constexpr bool operator==(const Result& a, const Result& b) {
     return a.raw == b.raw;

From 2129d040a509754839b82b1ff6d387cb4f84f168 Mon Sep 17 00:00:00 2001
From: Kelebek1 <eeeedddccc@hotmail.co.uk>
Date: Sun, 4 Sep 2022 05:41:06 +0100
Subject: [PATCH 2/2] Don't stall with nvdec

---
 src/audio_core/audio_core.cpp                      |  8 ++++++++
 src/audio_core/audio_core.h                        | 14 ++++++++++++++
 src/audio_core/sink/sink_stream.cpp                |  8 +++++++-
 .../hle/service/nvdrv/devices/nvhost_nvdec.cpp     |  7 ++++++-
 4 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/src/audio_core/audio_core.cpp b/src/audio_core/audio_core.cpp
index cf7e763e6..9feec1829 100644
--- a/src/audio_core/audio_core.cpp
+++ b/src/audio_core/audio_core.cpp
@@ -57,4 +57,12 @@ void AudioCore::PauseSinks(const bool pausing) const {
     }
 }
 
+void AudioCore::SetNVDECActive(bool active) {
+    nvdec_active = active;
+}
+
+bool AudioCore::IsNVDECActive() const {
+    return nvdec_active;
+}
+
 } // namespace AudioCore
diff --git a/src/audio_core/audio_core.h b/src/audio_core/audio_core.h
index fd1e43356..ac9afefaa 100644
--- a/src/audio_core/audio_core.h
+++ b/src/audio_core/audio_core.h
@@ -65,6 +65,18 @@ public:
      */
     void PauseSinks(bool pausing) const;
 
+    /**
+     * Toggle NVDEC state, used to avoid stall in playback.
+     *
+     * @param active - Set true if nvdec is active, otherwise false.
+     */
+    void SetNVDECActive(bool active);
+
+    /**
+     * Get NVDEC state.
+     */
+    bool IsNVDECActive() const;
+
 private:
     /**
      * Create the sinks on startup.
@@ -79,6 +91,8 @@ private:
     std::unique_ptr<Sink::Sink> input_sink;
     /// The ADSP in the sysmodule
     std::unique_ptr<AudioRenderer::ADSP::ADSP> adsp;
+    /// Is NVDec currently active?
+    bool nvdec_active{false};
 };
 
 } // namespace AudioCore
diff --git a/src/audio_core/sink/sink_stream.cpp b/src/audio_core/sink/sink_stream.cpp
index 3770c515d..24636e512 100644
--- a/src/audio_core/sink/sink_stream.cpp
+++ b/src/audio_core/sink/sink_stream.cpp
@@ -9,6 +9,7 @@
 #include <span>
 #include <vector>
 
+#include "audio_core/audio_core.h"
 #include "audio_core/common/common.h"
 #include "audio_core/sink/sink_stream.h"
 #include "common/common_types.h"
@@ -194,7 +195,12 @@ void SinkStream::ProcessAudioOutAndRender(std::span<s16> output_buffer, std::siz
     const std::size_t frame_size_bytes = frame_size * sizeof(s16);
     size_t frames_written{0};
 
-    if (queued_buffers > max_queue_size) {
+    // Due to many frames being queued up with nvdec (5 frames or so?), a lot of buffers also get
+    // queued up (30+) but not all at once, which causes constant stalling here, so just let the
+    // video play out without attempting to stall.
+    // Can hopefully remove this later with a more complete NVDEC implementation.
+    const auto nvdec_active{system.AudioCore().IsNVDECActive()};
+    if (!nvdec_active && queued_buffers > max_queue_size) {
         Stall();
     }
 
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
index 2a5128c60..a7385fce8 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec.cpp
@@ -1,6 +1,7 @@
 // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
 // SPDX-License-Identifier: GPL-2.0-or-later
 
+#include "audio_core/audio_core.h"
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "core/core.h"
@@ -65,7 +66,10 @@ NvResult nvhost_nvdec::Ioctl3(DeviceFD fd, Ioctl command, const std::vector<u8>&
     return NvResult::NotImplemented;
 }
 
-void nvhost_nvdec::OnOpen(DeviceFD fd) {}
+void nvhost_nvdec::OnOpen(DeviceFD fd) {
+    LOG_INFO(Service_NVDRV, "NVDEC video stream started");
+    system.AudioCore().SetNVDECActive(true);
+}
 
 void nvhost_nvdec::OnClose(DeviceFD fd) {
     LOG_INFO(Service_NVDRV, "NVDEC video stream ended");
@@ -73,6 +77,7 @@ void nvhost_nvdec::OnClose(DeviceFD fd) {
     if (iter != fd_to_id.end()) {
         system.GPU().ClearCdmaInstance(iter->second);
     }
+    system.AudioCore().SetNVDECActive(false);
 }
 
 } // namespace Service::Nvidia::Devices