Merge pull request #3964 from ReinUsesLisp/arb-integration
renderer_opengl: Add assembly program code paths
This commit is contained in:
commit
325e7eed3c
|
@ -112,6 +112,7 @@ void LogSettings() {
|
||||||
LogSetting("Renderer_UseAsynchronousGpuEmulation",
|
LogSetting("Renderer_UseAsynchronousGpuEmulation",
|
||||||
Settings::values.use_asynchronous_gpu_emulation);
|
Settings::values.use_asynchronous_gpu_emulation);
|
||||||
LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
|
LogSetting("Renderer_UseVsync", Settings::values.use_vsync);
|
||||||
|
LogSetting("Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders);
|
||||||
LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy);
|
LogSetting("Renderer_AnisotropicFilteringLevel", Settings::values.max_anisotropy);
|
||||||
LogSetting("Audio_OutputEngine", Settings::values.sink_id);
|
LogSetting("Audio_OutputEngine", Settings::values.sink_id);
|
||||||
LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
|
LogSetting("Audio_EnableAudioStretching", Settings::values.enable_audio_stretching);
|
||||||
|
|
|
@ -446,6 +446,7 @@ struct Values {
|
||||||
GPUAccuracy gpu_accuracy;
|
GPUAccuracy gpu_accuracy;
|
||||||
bool use_asynchronous_gpu_emulation;
|
bool use_asynchronous_gpu_emulation;
|
||||||
bool use_vsync;
|
bool use_vsync;
|
||||||
|
bool use_assembly_shaders;
|
||||||
bool force_30fps_mode;
|
bool force_30fps_mode;
|
||||||
bool use_fast_gpu_time;
|
bool use_fast_gpu_time;
|
||||||
|
|
||||||
|
|
|
@ -201,6 +201,7 @@ void TelemetrySession::AddInitialInfo(Loader::AppLoader& app_loader) {
|
||||||
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
|
AddField(field_type, "Renderer_UseAsynchronousGpuEmulation",
|
||||||
Settings::values.use_asynchronous_gpu_emulation);
|
Settings::values.use_asynchronous_gpu_emulation);
|
||||||
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
|
AddField(field_type, "Renderer_UseVsync", Settings::values.use_vsync);
|
||||||
|
AddField(field_type, "Renderer_UseAssemblyShaders", Settings::values.use_assembly_shaders);
|
||||||
AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode);
|
AddField(field_type, "System_UseDockedMode", Settings::values.use_docked_mode);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@
|
||||||
|
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
#include "common/scope_exit.h"
|
#include "common/scope_exit.h"
|
||||||
|
#include "core/settings.h"
|
||||||
#include "video_core/renderer_opengl/gl_device.h"
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
|
|
||||||
|
@ -183,10 +184,16 @@ Device::Device() : base_bindings{BuildBaseBindings()} {
|
||||||
has_precise_bug = TestPreciseBug();
|
has_precise_bug = TestPreciseBug();
|
||||||
has_broken_compute = is_intel_proprietary;
|
has_broken_compute = is_intel_proprietary;
|
||||||
has_fast_buffer_sub_data = is_nvidia;
|
has_fast_buffer_sub_data = is_nvidia;
|
||||||
|
use_assembly_shaders = Settings::values.use_assembly_shaders && GLAD_GL_NV_gpu_program5 &&
|
||||||
|
GLAD_GL_NV_compute_program5;
|
||||||
|
|
||||||
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
|
LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
|
||||||
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
|
LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
|
||||||
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
|
LOG_INFO(Render_OpenGL, "Renderer_PreciseBug: {}", has_precise_bug);
|
||||||
|
|
||||||
|
if (Settings::values.use_assembly_shaders && !use_assembly_shaders) {
|
||||||
|
LOG_ERROR(Render_OpenGL, "Assembly shaders enabled but not supported");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Device::Device(std::nullptr_t) {
|
Device::Device(std::nullptr_t) {
|
||||||
|
|
|
@ -88,6 +88,10 @@ public:
|
||||||
return has_fast_buffer_sub_data;
|
return has_fast_buffer_sub_data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool UseAssemblyShaders() const {
|
||||||
|
return use_assembly_shaders;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static bool TestVariableAoffi();
|
static bool TestVariableAoffi();
|
||||||
static bool TestPreciseBug();
|
static bool TestPreciseBug();
|
||||||
|
@ -107,6 +111,7 @@ private:
|
||||||
bool has_precise_bug{};
|
bool has_precise_bug{};
|
||||||
bool has_broken_compute{};
|
bool has_broken_compute{};
|
||||||
bool has_fast_buffer_sub_data{};
|
bool has_fast_buffer_sub_data{};
|
||||||
|
bool use_assembly_shaders{};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -94,17 +94,30 @@ void oglEnable(GLenum cap, bool state) {
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
||||||
ScreenInfo& info, GLShader::ProgramManager& program_manager,
|
const Device& device, ScreenInfo& info,
|
||||||
StateTracker& state_tracker)
|
ProgramManager& program_manager, StateTracker& state_tracker)
|
||||||
: RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
|
: RasterizerAccelerated{system.Memory()}, device{device}, texture_cache{system, *this, device,
|
||||||
|
state_tracker},
|
||||||
shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
|
shader_cache{*this, system, emu_window, device}, query_cache{system, *this},
|
||||||
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
|
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE},
|
||||||
fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
|
fence_manager{system, *this, texture_cache, buffer_cache, query_cache}, system{system},
|
||||||
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
|
screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker} {
|
||||||
CheckExtensions();
|
CheckExtensions();
|
||||||
|
|
||||||
|
if (device.UseAssemblyShaders()) {
|
||||||
|
glCreateBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
|
||||||
|
for (const GLuint cbuf : staging_cbufs) {
|
||||||
|
glNamedBufferStorage(cbuf, static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize),
|
||||||
|
nullptr, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
RasterizerOpenGL::~RasterizerOpenGL() {}
|
RasterizerOpenGL::~RasterizerOpenGL() {
|
||||||
|
if (device.UseAssemblyShaders()) {
|
||||||
|
glDeleteBuffers(static_cast<GLsizei>(staging_cbufs.size()), staging_cbufs.data());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::CheckExtensions() {
|
void RasterizerOpenGL::CheckExtensions() {
|
||||||
if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
|
if (!GLAD_GL_ARB_texture_filter_anisotropic && !GLAD_GL_EXT_texture_filter_anisotropic) {
|
||||||
|
@ -230,6 +243,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
|
||||||
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Shader);
|
MICROPROFILE_SCOPE(OpenGL_Shader);
|
||||||
auto& gpu = system.GPU().Maxwell3D();
|
auto& gpu = system.GPU().Maxwell3D();
|
||||||
|
std::size_t num_ssbos = 0;
|
||||||
u32 clip_distances = 0;
|
u32 clip_distances = 0;
|
||||||
|
|
||||||
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
|
||||||
|
@ -261,6 +275,14 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
|
||||||
|
|
||||||
Shader shader{shader_cache.GetStageProgram(program)};
|
Shader shader{shader_cache.GetStageProgram(program)};
|
||||||
|
|
||||||
|
if (device.UseAssemblyShaders()) {
|
||||||
|
// Check for ARB limitation. We only have 16 SSBOs per context state. To workaround this
|
||||||
|
// all stages share the same bindings.
|
||||||
|
const std::size_t num_stage_ssbos = shader->GetEntries().global_memory_entries.size();
|
||||||
|
ASSERT_MSG(num_stage_ssbos == 0 || num_ssbos == 0, "SSBOs on more than one stage");
|
||||||
|
num_ssbos += num_stage_ssbos;
|
||||||
|
}
|
||||||
|
|
||||||
// Stage indices are 0 - 5
|
// Stage indices are 0 - 5
|
||||||
const std::size_t stage = index == 0 ? 0 : index - 1;
|
const std::size_t stage = index == 0 ? 0 : index - 1;
|
||||||
SetupDrawConstBuffers(stage, shader);
|
SetupDrawConstBuffers(stage, shader);
|
||||||
|
@ -526,6 +548,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
SyncFramebufferSRGB();
|
SyncFramebufferSRGB();
|
||||||
|
|
||||||
buffer_cache.Acquire();
|
buffer_cache.Acquire();
|
||||||
|
current_cbuf = 0;
|
||||||
|
|
||||||
std::size_t buffer_size = CalculateVertexArraysSize();
|
std::size_t buffer_size = CalculateVertexArraysSize();
|
||||||
|
|
||||||
|
@ -535,9 +558,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Uniform space for the 5 shader stages
|
// Uniform space for the 5 shader stages
|
||||||
buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) +
|
buffer_size =
|
||||||
(sizeof(GLShader::MaxwellUniformData) + device.GetUniformBufferAlignment()) *
|
Common::AlignUp<std::size_t>(buffer_size, 4) +
|
||||||
Maxwell::MaxShaderStage;
|
(sizeof(MaxwellUniformData) + device.GetUniformBufferAlignment()) * Maxwell::MaxShaderStage;
|
||||||
|
|
||||||
// Add space for at least 18 constant buffers
|
// Add space for at least 18 constant buffers
|
||||||
buffer_size += Maxwell::MaxConstBuffers *
|
buffer_size += Maxwell::MaxConstBuffers *
|
||||||
|
@ -558,12 +581,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup emulation uniform buffer.
|
// Setup emulation uniform buffer.
|
||||||
GLShader::MaxwellUniformData ubo;
|
if (!device.UseAssemblyShaders()) {
|
||||||
|
MaxwellUniformData ubo;
|
||||||
ubo.SetFromRegs(gpu);
|
ubo.SetFromRegs(gpu);
|
||||||
const auto [buffer, offset] =
|
const auto [buffer, offset] =
|
||||||
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
|
glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
|
||||||
static_cast<GLsizeiptr>(sizeof(ubo)));
|
static_cast<GLsizeiptr>(sizeof(ubo)));
|
||||||
|
}
|
||||||
|
|
||||||
// Setup shaders and their used resources.
|
// Setup shaders and their used resources.
|
||||||
texture_cache.GuardSamplers(true);
|
texture_cache.GuardSamplers(true);
|
||||||
|
@ -635,11 +660,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
buffer_cache.Acquire();
|
buffer_cache.Acquire();
|
||||||
|
current_cbuf = 0;
|
||||||
|
|
||||||
auto kernel = shader_cache.GetComputeKernel(code_addr);
|
auto kernel = shader_cache.GetComputeKernel(code_addr);
|
||||||
SetupComputeTextures(kernel);
|
SetupComputeTextures(kernel);
|
||||||
SetupComputeImages(kernel);
|
SetupComputeImages(kernel);
|
||||||
program_manager.BindComputeShader(kernel->GetHandle());
|
|
||||||
|
|
||||||
const std::size_t buffer_size =
|
const std::size_t buffer_size =
|
||||||
Tegra::Engines::KeplerCompute::NumConstBuffers *
|
Tegra::Engines::KeplerCompute::NumConstBuffers *
|
||||||
|
@ -652,6 +677,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
buffer_cache.Unmap();
|
buffer_cache.Unmap();
|
||||||
|
|
||||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||||
|
program_manager.BindCompute(kernel->GetHandle());
|
||||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||||
++num_queued_commands;
|
++num_queued_commands;
|
||||||
}
|
}
|
||||||
|
@ -812,14 +838,20 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
|
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
|
||||||
|
static constexpr std::array PARAMETER_LUT = {
|
||||||
|
GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV, GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV,
|
||||||
|
GL_TESS_EVALUATION_PROGRAM_PARAMETER_BUFFER_NV, GL_GEOMETRY_PROGRAM_PARAMETER_BUFFER_NV,
|
||||||
|
GL_FRAGMENT_PROGRAM_PARAMETER_BUFFER_NV};
|
||||||
|
|
||||||
MICROPROFILE_SCOPE(OpenGL_UBO);
|
MICROPROFILE_SCOPE(OpenGL_UBO);
|
||||||
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
|
const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
|
||||||
const auto& shader_stage = stages[stage_index];
|
const auto& shader_stage = stages[stage_index];
|
||||||
|
|
||||||
u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
|
u32 binding =
|
||||||
|
device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).uniform_buffer;
|
||||||
for (const auto& entry : shader->GetEntries().const_buffers) {
|
for (const auto& entry : shader->GetEntries().const_buffers) {
|
||||||
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
|
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
|
||||||
SetupConstBuffer(binding++, buffer, entry);
|
SetupConstBuffer(PARAMETER_LUT[stage_index], binding++, buffer, entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -835,16 +867,21 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
|
||||||
buffer.address = config.Address();
|
buffer.address = config.Address();
|
||||||
buffer.size = config.size;
|
buffer.size = config.size;
|
||||||
buffer.enabled = mask[entry.GetIndex()];
|
buffer.enabled = mask[entry.GetIndex()];
|
||||||
SetupConstBuffer(binding++, buffer, entry);
|
SetupConstBuffer(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding++, buffer, entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
void RasterizerOpenGL::SetupConstBuffer(GLenum stage, u32 binding,
|
||||||
|
const Tegra::Engines::ConstBufferInfo& buffer,
|
||||||
const ConstBufferEntry& entry) {
|
const ConstBufferEntry& entry) {
|
||||||
if (!buffer.enabled) {
|
if (!buffer.enabled) {
|
||||||
// Set values to zero to unbind buffers
|
// Set values to zero to unbind buffers
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
|
if (device.UseAssemblyShaders()) {
|
||||||
sizeof(float));
|
glBindBufferRangeNV(stage, entry.GetIndex(), 0, 0, 0);
|
||||||
|
} else {
|
||||||
|
glBindBufferRange(GL_UNIFORM_BUFFER, binding,
|
||||||
|
buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float));
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -853,9 +890,19 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const
|
||||||
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
|
const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
|
||||||
|
|
||||||
const auto alignment = device.GetUniformBufferAlignment();
|
const auto alignment = device.GetUniformBufferAlignment();
|
||||||
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
|
auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
|
||||||
device.HasFastBufferSubData());
|
device.HasFastBufferSubData());
|
||||||
|
if (!device.UseAssemblyShaders()) {
|
||||||
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
|
glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (offset != 0) {
|
||||||
|
const GLuint staging_cbuf = staging_cbufs[current_cbuf++];
|
||||||
|
glCopyNamedBufferSubData(cbuf, staging_cbuf, offset, 0, size);
|
||||||
|
cbuf = staging_cbuf;
|
||||||
|
offset = 0;
|
||||||
|
}
|
||||||
|
glBindBufferRangeNV(stage, binding, cbuf, offset, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
|
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
|
||||||
|
@ -863,7 +910,8 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
|
||||||
auto& memory_manager{gpu.MemoryManager()};
|
auto& memory_manager{gpu.MemoryManager()};
|
||||||
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
|
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
|
||||||
|
|
||||||
u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
|
u32 binding =
|
||||||
|
device.UseAssemblyShaders() ? 0 : device.GetBaseBindings(stage_index).shader_storage_buffer;
|
||||||
for (const auto& entry : shader->GetEntries().global_memory_entries) {
|
for (const auto& entry : shader->GetEntries().global_memory_entries) {
|
||||||
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
const GPUVAddr addr{cbufs.const_buffers[entry.cbuf_index].address + entry.cbuf_offset};
|
||||||
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
|
const GPUVAddr gpu_addr{memory_manager.Read<u64>(addr)};
|
||||||
|
|
|
@ -56,8 +56,8 @@ struct DrawParameters;
|
||||||
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
|
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
|
||||||
public:
|
public:
|
||||||
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
||||||
ScreenInfo& info, GLShader::ProgramManager& program_manager,
|
const Device& device, ScreenInfo& info,
|
||||||
StateTracker& state_tracker);
|
ProgramManager& program_manager, StateTracker& state_tracker);
|
||||||
~RasterizerOpenGL() override;
|
~RasterizerOpenGL() override;
|
||||||
|
|
||||||
void Draw(bool is_indexed, bool is_instanced) override;
|
void Draw(bool is_indexed, bool is_instanced) override;
|
||||||
|
@ -106,7 +106,7 @@ private:
|
||||||
void SetupComputeConstBuffers(const Shader& kernel);
|
void SetupComputeConstBuffers(const Shader& kernel);
|
||||||
|
|
||||||
/// Configures a constant buffer.
|
/// Configures a constant buffer.
|
||||||
void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
void SetupConstBuffer(GLenum stage, u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
|
||||||
const ConstBufferEntry& entry);
|
const ConstBufferEntry& entry);
|
||||||
|
|
||||||
/// Configures the current global memory entries to use for the draw command.
|
/// Configures the current global memory entries to use for the draw command.
|
||||||
|
@ -224,7 +224,7 @@ private:
|
||||||
|
|
||||||
void SetupShaders(GLenum primitive_mode);
|
void SetupShaders(GLenum primitive_mode);
|
||||||
|
|
||||||
const Device device;
|
const Device& device;
|
||||||
|
|
||||||
TextureCacheOpenGL texture_cache;
|
TextureCacheOpenGL texture_cache;
|
||||||
ShaderCacheOpenGL shader_cache;
|
ShaderCacheOpenGL shader_cache;
|
||||||
|
@ -236,7 +236,7 @@ private:
|
||||||
|
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
ScreenInfo& screen_info;
|
ScreenInfo& screen_info;
|
||||||
GLShader::ProgramManager& program_manager;
|
ProgramManager& program_manager;
|
||||||
StateTracker& state_tracker;
|
StateTracker& state_tracker;
|
||||||
|
|
||||||
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
|
||||||
|
@ -248,6 +248,12 @@ private:
|
||||||
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
|
||||||
enabled_transform_feedback_buffers;
|
enabled_transform_feedback_buffers;
|
||||||
|
|
||||||
|
static constexpr std::size_t NUM_CONSTANT_BUFFERS =
|
||||||
|
Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
|
||||||
|
Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram;
|
||||||
|
std::array<GLuint, NUM_CONSTANT_BUFFERS> staging_cbufs{};
|
||||||
|
std::size_t current_cbuf = 0;
|
||||||
|
|
||||||
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
||||||
std::size_t num_queued_commands = 0;
|
std::size_t num_queued_commands = 0;
|
||||||
|
|
||||||
|
|
|
@ -125,6 +125,15 @@ void OGLProgram::Release() {
|
||||||
handle = 0;
|
handle = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OGLAssemblyProgram::Release() {
|
||||||
|
if (handle == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||||
|
glDeleteProgramsARB(1, &handle);
|
||||||
|
handle = 0;
|
||||||
|
}
|
||||||
|
|
||||||
void OGLPipeline::Create() {
|
void OGLPipeline::Create() {
|
||||||
if (handle != 0)
|
if (handle != 0)
|
||||||
return;
|
return;
|
||||||
|
|
|
@ -167,6 +167,22 @@ public:
|
||||||
GLuint handle = 0;
|
GLuint handle = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class OGLAssemblyProgram : private NonCopyable {
|
||||||
|
public:
|
||||||
|
OGLAssemblyProgram() = default;
|
||||||
|
|
||||||
|
OGLAssemblyProgram(OGLAssemblyProgram&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||||
|
|
||||||
|
~OGLAssemblyProgram() {
|
||||||
|
Release();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Deletes the internal OpenGL resource
|
||||||
|
void Release();
|
||||||
|
|
||||||
|
GLuint handle = 0;
|
||||||
|
};
|
||||||
|
|
||||||
class OGLPipeline : private NonCopyable {
|
class OGLPipeline : private NonCopyable {
|
||||||
public:
|
public:
|
||||||
OGLPipeline() = default;
|
OGLPipeline() = default;
|
||||||
|
|
|
@ -97,6 +97,24 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr GLenum AssemblyEnum(ShaderType shader_type) {
|
||||||
|
switch (shader_type) {
|
||||||
|
case ShaderType::Vertex:
|
||||||
|
return GL_VERTEX_PROGRAM_NV;
|
||||||
|
case ShaderType::TesselationControl:
|
||||||
|
return GL_TESS_CONTROL_PROGRAM_NV;
|
||||||
|
case ShaderType::TesselationEval:
|
||||||
|
return GL_TESS_EVALUATION_PROGRAM_NV;
|
||||||
|
case ShaderType::Geometry:
|
||||||
|
return GL_GEOMETRY_PROGRAM_NV;
|
||||||
|
case ShaderType::Fragment:
|
||||||
|
return GL_FRAGMENT_PROGRAM_NV;
|
||||||
|
case ShaderType::Compute:
|
||||||
|
return GL_COMPUTE_PROGRAM_NV;
|
||||||
|
}
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
|
std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
|
||||||
return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
|
return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
|
||||||
}
|
}
|
||||||
|
@ -120,18 +138,43 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
|
||||||
return registry;
|
return registry;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
|
ProgramSharedPtr BuildShader(const Device& device, ShaderType shader_type, u64 unique_identifier,
|
||||||
u64 unique_identifier, const ShaderIR& ir,
|
const ShaderIR& ir, const Registry& registry,
|
||||||
const Registry& registry, bool hint_retrievable = false) {
|
bool hint_retrievable = false) {
|
||||||
const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
|
const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
|
||||||
LOG_INFO(Render_OpenGL, "{}", shader_id);
|
LOG_INFO(Render_OpenGL, "{}", shader_id);
|
||||||
|
|
||||||
|
auto program = std::make_shared<ProgramHandle>();
|
||||||
|
|
||||||
|
if (device.UseAssemblyShaders()) {
|
||||||
|
const std::string arb = "Not implemented";
|
||||||
|
|
||||||
|
GLuint& arb_prog = program->assembly_program.handle;
|
||||||
|
|
||||||
|
// Commented out functions signal OpenGL errors but are compatible with apitrace.
|
||||||
|
// Use them only to capture and replay on apitrace.
|
||||||
|
#if 0
|
||||||
|
glGenProgramsNV(1, &arb_prog);
|
||||||
|
glLoadProgramNV(AssemblyEnum(shader_type), arb_prog, static_cast<GLsizei>(arb.size()),
|
||||||
|
reinterpret_cast<const GLubyte*>(arb.data()));
|
||||||
|
#else
|
||||||
|
glGenProgramsARB(1, &arb_prog);
|
||||||
|
glNamedProgramStringEXT(arb_prog, AssemblyEnum(shader_type), GL_PROGRAM_FORMAT_ASCII_ARB,
|
||||||
|
static_cast<GLsizei>(arb.size()), arb.data());
|
||||||
|
#endif
|
||||||
|
const auto err = reinterpret_cast<const char*>(glGetString(GL_PROGRAM_ERROR_STRING_NV));
|
||||||
|
if (err && *err) {
|
||||||
|
LOG_CRITICAL(Render_OpenGL, "{}", err);
|
||||||
|
LOG_INFO(Render_OpenGL, "\n{}", arb);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
|
const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
|
||||||
OGLShader shader;
|
OGLShader shader;
|
||||||
shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
|
shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
|
||||||
|
|
||||||
auto program = std::make_shared<OGLProgram>();
|
program->source_program.Create(true, hint_retrievable, shader.handle);
|
||||||
program->Create(true, hint_retrievable, shader.handle);
|
}
|
||||||
|
|
||||||
return program;
|
return program;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -153,15 +196,22 @@ std::unordered_set<GLenum> GetSupportedFormats() {
|
||||||
|
|
||||||
CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
CachedShader::CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
||||||
std::shared_ptr<VideoCommon::Shader::Registry> registry,
|
std::shared_ptr<VideoCommon::Shader::Registry> registry,
|
||||||
ShaderEntries entries, std::shared_ptr<OGLProgram> program)
|
ShaderEntries entries, ProgramSharedPtr program_)
|
||||||
: RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
|
: RasterizerCacheObject{cpu_addr}, registry{std::move(registry)}, entries{std::move(entries)},
|
||||||
size_in_bytes{size_in_bytes}, program{std::move(program)} {}
|
size_in_bytes{size_in_bytes}, program{std::move(program_)} {
|
||||||
|
// Assign either the assembly program or source program. We can't have both.
|
||||||
|
handle = program->assembly_program.handle;
|
||||||
|
if (handle == 0) {
|
||||||
|
handle = program->source_program.handle;
|
||||||
|
}
|
||||||
|
ASSERT(handle != 0);
|
||||||
|
}
|
||||||
|
|
||||||
CachedShader::~CachedShader() = default;
|
CachedShader::~CachedShader() = default;
|
||||||
|
|
||||||
GLuint CachedShader::GetHandle() const {
|
GLuint CachedShader::GetHandle() const {
|
||||||
DEBUG_ASSERT(registry->IsConsistent());
|
DEBUG_ASSERT(registry->IsConsistent());
|
||||||
return program->handle;
|
return handle;
|
||||||
}
|
}
|
||||||
|
|
||||||
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
|
||||||
|
@ -239,7 +289,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector gl_cache = disk_cache.LoadPrecompiled();
|
std::vector<ShaderDiskCachePrecompiled> gl_cache;
|
||||||
|
if (!device.UseAssemblyShaders()) {
|
||||||
|
// Only load precompiled cache when we are not using assembly shaders
|
||||||
|
gl_cache = disk_cache.LoadPrecompiled();
|
||||||
|
}
|
||||||
const auto supported_formats = GetSupportedFormats();
|
const auto supported_formats = GetSupportedFormats();
|
||||||
|
|
||||||
// Track if precompiled cache was altered during loading to know if we have to
|
// Track if precompiled cache was altered during loading to know if we have to
|
||||||
|
@ -278,7 +332,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
auto registry = MakeRegistry(entry);
|
auto registry = MakeRegistry(entry);
|
||||||
const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
|
const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
|
||||||
|
|
||||||
std::shared_ptr<OGLProgram> program;
|
ProgramSharedPtr program;
|
||||||
if (precompiled_entry) {
|
if (precompiled_entry) {
|
||||||
// If the shader is precompiled, attempt to load it with
|
// If the shader is precompiled, attempt to load it with
|
||||||
program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
|
program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
|
||||||
|
@ -332,6 +386,11 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (device.UseAssemblyShaders()) {
|
||||||
|
// Don't store precompiled binaries for assembly shaders.
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
|
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
|
||||||
// before precompiling them
|
// before precompiling them
|
||||||
|
|
||||||
|
@ -339,7 +398,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
const u64 id = (*transferable)[i].unique_identifier;
|
const u64 id = (*transferable)[i].unique_identifier;
|
||||||
const auto it = find_precompiled(id);
|
const auto it = find_precompiled(id);
|
||||||
if (it == gl_cache.end()) {
|
if (it == gl_cache.end()) {
|
||||||
const GLuint program = runtime_cache.at(id).program->handle;
|
const GLuint program = runtime_cache.at(id).program->source_program.handle;
|
||||||
disk_cache.SavePrecompiled(id, program);
|
disk_cache.SavePrecompiled(id, program);
|
||||||
precompiled_cache_altered = true;
|
precompiled_cache_altered = true;
|
||||||
}
|
}
|
||||||
|
@ -350,7 +409,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
|
ProgramSharedPtr ShaderCacheOpenGL::GeneratePrecompiledProgram(
|
||||||
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
|
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
|
||||||
const std::unordered_set<GLenum>& supported_formats) {
|
const std::unordered_set<GLenum>& supported_formats) {
|
||||||
if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
|
if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
|
||||||
|
@ -358,15 +417,15 @@ std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
auto program = std::make_shared<OGLProgram>();
|
auto program = std::make_shared<ProgramHandle>();
|
||||||
program->handle = glCreateProgram();
|
GLuint& handle = program->source_program.handle;
|
||||||
glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
|
handle = glCreateProgram();
|
||||||
glProgramBinary(program->handle, precompiled_entry.binary_format,
|
glProgramParameteri(handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
|
||||||
precompiled_entry.binary.data(),
|
glProgramBinary(handle, precompiled_entry.binary_format, precompiled_entry.binary.data(),
|
||||||
static_cast<GLsizei>(precompiled_entry.binary.size()));
|
static_cast<GLsizei>(precompiled_entry.binary.size()));
|
||||||
|
|
||||||
GLint link_status;
|
GLint link_status;
|
||||||
glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status);
|
glGetProgramiv(handle, GL_LINK_STATUS, &link_status);
|
||||||
if (link_status == GL_FALSE) {
|
if (link_status == GL_FALSE) {
|
||||||
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
|
LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
|
||||||
return {};
|
return {};
|
||||||
|
|
|
@ -43,8 +43,14 @@ struct UnspecializedShader;
|
||||||
using Shader = std::shared_ptr<CachedShader>;
|
using Shader = std::shared_ptr<CachedShader>;
|
||||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
|
||||||
|
struct ProgramHandle {
|
||||||
|
OGLProgram source_program;
|
||||||
|
OGLAssemblyProgram assembly_program;
|
||||||
|
};
|
||||||
|
using ProgramSharedPtr = std::shared_ptr<ProgramHandle>;
|
||||||
|
|
||||||
struct PrecompiledShader {
|
struct PrecompiledShader {
|
||||||
std::shared_ptr<OGLProgram> program;
|
ProgramSharedPtr program;
|
||||||
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
||||||
ShaderEntries entries;
|
ShaderEntries entries;
|
||||||
};
|
};
|
||||||
|
@ -87,12 +93,13 @@ public:
|
||||||
private:
|
private:
|
||||||
explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
explicit CachedShader(VAddr cpu_addr, std::size_t size_in_bytes,
|
||||||
std::shared_ptr<VideoCommon::Shader::Registry> registry,
|
std::shared_ptr<VideoCommon::Shader::Registry> registry,
|
||||||
ShaderEntries entries, std::shared_ptr<OGLProgram> program);
|
ShaderEntries entries, ProgramSharedPtr program);
|
||||||
|
|
||||||
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
std::shared_ptr<VideoCommon::Shader::Registry> registry;
|
||||||
ShaderEntries entries;
|
ShaderEntries entries;
|
||||||
std::size_t size_in_bytes = 0;
|
std::size_t size_in_bytes = 0;
|
||||||
std::shared_ptr<OGLProgram> program;
|
ProgramSharedPtr program;
|
||||||
|
GLuint handle = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
|
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
|
||||||
|
@ -115,7 +122,7 @@ protected:
|
||||||
void FlushObjectInner(const Shader& object) override {}
|
void FlushObjectInner(const Shader& object) override {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::shared_ptr<OGLProgram> GeneratePrecompiledProgram(
|
ProgramSharedPtr GeneratePrecompiledProgram(
|
||||||
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
|
const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
|
||||||
const std::unordered_set<GLenum>& supported_formats);
|
const std::unordered_set<GLenum>& supported_formats);
|
||||||
|
|
||||||
|
|
|
@ -6,47 +6,107 @@
|
||||||
|
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||||
|
|
||||||
namespace OpenGL::GLShader {
|
namespace OpenGL {
|
||||||
|
|
||||||
ProgramManager::ProgramManager() = default;
|
ProgramManager::ProgramManager(const Device& device) {
|
||||||
|
use_assembly_programs = device.UseAssemblyShaders();
|
||||||
|
if (use_assembly_programs) {
|
||||||
|
glEnable(GL_COMPUTE_PROGRAM_NV);
|
||||||
|
} else {
|
||||||
|
graphics_pipeline.Create();
|
||||||
|
glBindProgramPipeline(graphics_pipeline.handle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ProgramManager::~ProgramManager() = default;
|
ProgramManager::~ProgramManager() = default;
|
||||||
|
|
||||||
void ProgramManager::Create() {
|
void ProgramManager::BindCompute(GLuint program) {
|
||||||
graphics_pipeline.Create();
|
if (use_assembly_programs) {
|
||||||
glBindProgramPipeline(graphics_pipeline.handle);
|
glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program);
|
||||||
|
} else {
|
||||||
|
is_graphics_bound = false;
|
||||||
|
glUseProgram(program);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProgramManager::BindGraphicsPipeline() {
|
void ProgramManager::BindGraphicsPipeline() {
|
||||||
|
if (use_assembly_programs) {
|
||||||
|
UpdateAssemblyPrograms();
|
||||||
|
} else {
|
||||||
|
UpdateSourcePrograms();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProgramManager::BindHostPipeline(GLuint pipeline) {
|
||||||
|
if (use_assembly_programs) {
|
||||||
|
if (geometry_enabled) {
|
||||||
|
geometry_enabled = false;
|
||||||
|
old_state.geometry = 0;
|
||||||
|
glDisable(GL_GEOMETRY_PROGRAM_NV);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
glBindProgramPipeline(pipeline);
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProgramManager::RestoreGuestPipeline() {
|
||||||
|
if (use_assembly_programs) {
|
||||||
|
glBindProgramPipeline(0);
|
||||||
|
} else {
|
||||||
|
glBindProgramPipeline(graphics_pipeline.handle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProgramManager::UpdateAssemblyPrograms() {
|
||||||
|
const auto update_state = [](GLenum stage, bool& enabled, GLuint current, GLuint old) {
|
||||||
|
if (current == old) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (current == 0) {
|
||||||
|
if (enabled) {
|
||||||
|
enabled = false;
|
||||||
|
glDisable(stage);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (!enabled) {
|
||||||
|
enabled = true;
|
||||||
|
glEnable(stage);
|
||||||
|
}
|
||||||
|
glBindProgramARB(stage, current);
|
||||||
|
};
|
||||||
|
|
||||||
|
update_state(GL_VERTEX_PROGRAM_NV, vertex_enabled, current_state.vertex, old_state.vertex);
|
||||||
|
update_state(GL_GEOMETRY_PROGRAM_NV, geometry_enabled, current_state.geometry,
|
||||||
|
old_state.geometry);
|
||||||
|
update_state(GL_FRAGMENT_PROGRAM_NV, fragment_enabled, current_state.fragment,
|
||||||
|
old_state.fragment);
|
||||||
|
|
||||||
|
old_state = current_state;
|
||||||
|
}
|
||||||
|
|
||||||
|
void ProgramManager::UpdateSourcePrograms() {
|
||||||
if (!is_graphics_bound) {
|
if (!is_graphics_bound) {
|
||||||
is_graphics_bound = true;
|
is_graphics_bound = true;
|
||||||
glUseProgram(0);
|
glUseProgram(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Avoid updating the pipeline when values have no changed
|
const GLuint handle = graphics_pipeline.handle;
|
||||||
if (old_state == current_state) {
|
const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) {
|
||||||
|
if (current == old) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
glUseProgramStages(handle, stage, current);
|
||||||
// Workaround for AMD bug
|
};
|
||||||
static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
|
update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex);
|
||||||
GL_FRAGMENT_SHADER_BIT};
|
update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry);
|
||||||
const GLuint handle = graphics_pipeline.handle;
|
update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment);
|
||||||
glUseProgramStages(handle, all_used_stages, 0);
|
|
||||||
glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
|
|
||||||
glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
|
|
||||||
glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
|
|
||||||
|
|
||||||
old_state = current_state;
|
old_state = current_state;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProgramManager::BindComputeShader(GLuint program) {
|
|
||||||
is_graphics_bound = false;
|
|
||||||
glUseProgram(program);
|
|
||||||
}
|
|
||||||
|
|
||||||
void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
|
void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
|
||||||
const auto& regs = maxwell.regs;
|
const auto& regs = maxwell.regs;
|
||||||
|
|
||||||
|
@ -54,4 +114,4 @@ void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
|
||||||
y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
|
y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace OpenGL::GLShader
|
} // namespace OpenGL
|
||||||
|
|
|
@ -11,7 +11,9 @@
|
||||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
#include "video_core/renderer_opengl/maxwell_to_gl.h"
|
||||||
|
|
||||||
namespace OpenGL::GLShader {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
class Device;
|
||||||
|
|
||||||
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
|
/// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
|
||||||
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
|
/// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
|
||||||
|
@ -28,50 +30,58 @@ static_assert(sizeof(MaxwellUniformData) < 16384,
|
||||||
|
|
||||||
class ProgramManager {
|
class ProgramManager {
|
||||||
public:
|
public:
|
||||||
explicit ProgramManager();
|
explicit ProgramManager(const Device& device);
|
||||||
~ProgramManager();
|
~ProgramManager();
|
||||||
|
|
||||||
void Create();
|
/// Binds a compute program
|
||||||
|
void BindCompute(GLuint program);
|
||||||
|
|
||||||
/// Updates the graphics pipeline and binds it.
|
/// Updates bound programs.
|
||||||
void BindGraphicsPipeline();
|
void BindGraphicsPipeline();
|
||||||
|
|
||||||
/// Binds a compute shader.
|
/// Binds an OpenGL pipeline object unsynchronized with the guest state.
|
||||||
void BindComputeShader(GLuint program);
|
void BindHostPipeline(GLuint pipeline);
|
||||||
|
|
||||||
|
/// Rewinds BindHostPipeline state changes.
|
||||||
|
void RestoreGuestPipeline();
|
||||||
|
|
||||||
void UseVertexShader(GLuint program) {
|
void UseVertexShader(GLuint program) {
|
||||||
current_state.vertex_shader = program;
|
current_state.vertex = program;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UseGeometryShader(GLuint program) {
|
void UseGeometryShader(GLuint program) {
|
||||||
current_state.geometry_shader = program;
|
current_state.geometry = program;
|
||||||
}
|
}
|
||||||
|
|
||||||
void UseFragmentShader(GLuint program) {
|
void UseFragmentShader(GLuint program) {
|
||||||
current_state.fragment_shader = program;
|
current_state.fragment = program;
|
||||||
}
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct PipelineState {
|
struct PipelineState {
|
||||||
bool operator==(const PipelineState& rhs) const noexcept {
|
GLuint vertex = 0;
|
||||||
return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
|
GLuint geometry = 0;
|
||||||
geometry_shader == rhs.geometry_shader;
|
GLuint fragment = 0;
|
||||||
}
|
|
||||||
|
|
||||||
bool operator!=(const PipelineState& rhs) const noexcept {
|
|
||||||
return !operator==(rhs);
|
|
||||||
}
|
|
||||||
|
|
||||||
GLuint vertex_shader = 0;
|
|
||||||
GLuint fragment_shader = 0;
|
|
||||||
GLuint geometry_shader = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Update NV_gpu_program5 programs.
|
||||||
|
void UpdateAssemblyPrograms();
|
||||||
|
|
||||||
|
/// Update GLSL programs.
|
||||||
|
void UpdateSourcePrograms();
|
||||||
|
|
||||||
OGLPipeline graphics_pipeline;
|
OGLPipeline graphics_pipeline;
|
||||||
OGLPipeline compute_pipeline;
|
|
||||||
PipelineState current_state;
|
PipelineState current_state;
|
||||||
PipelineState old_state;
|
PipelineState old_state;
|
||||||
|
|
||||||
|
bool use_assembly_programs = false;
|
||||||
|
|
||||||
bool is_graphics_bound = true;
|
bool is_graphics_bound = true;
|
||||||
|
|
||||||
|
bool vertex_enabled = false;
|
||||||
|
bool geometry_enabled = false;
|
||||||
|
bool fragment_enabled = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL::GLShader
|
} // namespace OpenGL
|
||||||
|
|
|
@ -316,7 +316,7 @@ public:
|
||||||
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
|
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system,
|
||||||
Core::Frontend::GraphicsContext& context)
|
Core::Frontend::GraphicsContext& context)
|
||||||
: RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
|
: RendererBase{emu_window}, emu_window{emu_window}, system{system}, context{context},
|
||||||
has_debug_tool{HasDebugTool()} {}
|
program_manager{device}, has_debug_tool{HasDebugTool()} {}
|
||||||
|
|
||||||
RendererOpenGL::~RendererOpenGL() = default;
|
RendererOpenGL::~RendererOpenGL() = default;
|
||||||
|
|
||||||
|
@ -468,8 +468,9 @@ void RendererOpenGL::InitOpenGLObjects() {
|
||||||
vertex_program.Create(true, false, vertex_shader.handle);
|
vertex_program.Create(true, false, vertex_shader.handle);
|
||||||
fragment_program.Create(true, false, fragment_shader.handle);
|
fragment_program.Create(true, false, fragment_shader.handle);
|
||||||
|
|
||||||
// Create program pipeline
|
pipeline.Create();
|
||||||
program_manager.Create();
|
glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle);
|
||||||
|
glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle);
|
||||||
|
|
||||||
// Generate VBO handle for drawing
|
// Generate VBO handle for drawing
|
||||||
vertex_buffer.Create();
|
vertex_buffer.Create();
|
||||||
|
@ -508,7 +509,7 @@ void RendererOpenGL::CreateRasterizer() {
|
||||||
if (rasterizer) {
|
if (rasterizer) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info,
|
rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, device, screen_info,
|
||||||
program_manager, state_tracker);
|
program_manager, state_tracker);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -620,10 +621,7 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
||||||
state_tracker.NotifyClipControl();
|
state_tracker.NotifyClipControl();
|
||||||
state_tracker.NotifyAlphaTest();
|
state_tracker.NotifyAlphaTest();
|
||||||
|
|
||||||
program_manager.UseVertexShader(vertex_program.handle);
|
program_manager.BindHostPipeline(pipeline.handle);
|
||||||
program_manager.UseGeometryShader(0);
|
|
||||||
program_manager.UseFragmentShader(fragment_program.handle);
|
|
||||||
program_manager.BindGraphicsPipeline();
|
|
||||||
|
|
||||||
glEnable(GL_CULL_FACE);
|
glEnable(GL_CULL_FACE);
|
||||||
if (screen_info.display_srgb) {
|
if (screen_info.display_srgb) {
|
||||||
|
@ -665,6 +663,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
|
||||||
|
|
||||||
glClear(GL_COLOR_BUFFER_BIT);
|
glClear(GL_COLOR_BUFFER_BIT);
|
||||||
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
|
||||||
|
|
||||||
|
program_manager.RestoreGuestPipeline();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool RendererOpenGL::TryPresent(int timeout_ms) {
|
bool RendererOpenGL::TryPresent(int timeout_ms) {
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/math_util.h"
|
#include "common/math_util.h"
|
||||||
#include "video_core/renderer_base.h"
|
#include "video_core/renderer_base.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
#include "video_core/renderer_opengl/gl_shader_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
#include "video_core/renderer_opengl/gl_state_tracker.h"
|
||||||
|
@ -95,6 +96,7 @@ private:
|
||||||
Core::Frontend::EmuWindow& emu_window;
|
Core::Frontend::EmuWindow& emu_window;
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
Core::Frontend::GraphicsContext& context;
|
Core::Frontend::GraphicsContext& context;
|
||||||
|
const Device device;
|
||||||
|
|
||||||
StateTracker state_tracker{system};
|
StateTracker state_tracker{system};
|
||||||
|
|
||||||
|
@ -102,13 +104,14 @@ private:
|
||||||
OGLBuffer vertex_buffer;
|
OGLBuffer vertex_buffer;
|
||||||
OGLProgram vertex_program;
|
OGLProgram vertex_program;
|
||||||
OGLProgram fragment_program;
|
OGLProgram fragment_program;
|
||||||
|
OGLPipeline pipeline;
|
||||||
OGLFramebuffer screenshot_framebuffer;
|
OGLFramebuffer screenshot_framebuffer;
|
||||||
|
|
||||||
/// Display information for Switch screen
|
/// Display information for Switch screen
|
||||||
ScreenInfo screen_info;
|
ScreenInfo screen_info;
|
||||||
|
|
||||||
/// Global dummy shader pipeline
|
/// Global dummy shader pipeline
|
||||||
GLShader::ProgramManager program_manager;
|
ProgramManager program_manager;
|
||||||
|
|
||||||
/// OpenGL framebuffer data
|
/// OpenGL framebuffer data
|
||||||
std::vector<u8> gl_framebuffer_data;
|
std::vector<u8> gl_framebuffer_data;
|
||||||
|
|
|
@ -643,6 +643,8 @@ void Config::ReadRendererValues() {
|
||||||
Settings::values.use_asynchronous_gpu_emulation =
|
Settings::values.use_asynchronous_gpu_emulation =
|
||||||
ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
|
ReadSetting(QStringLiteral("use_asynchronous_gpu_emulation"), false).toBool();
|
||||||
Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
|
Settings::values.use_vsync = ReadSetting(QStringLiteral("use_vsync"), true).toBool();
|
||||||
|
Settings::values.use_assembly_shaders =
|
||||||
|
ReadSetting(QStringLiteral("use_assembly_shaders"), false).toBool();
|
||||||
Settings::values.use_fast_gpu_time =
|
Settings::values.use_fast_gpu_time =
|
||||||
ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool();
|
ReadSetting(QStringLiteral("use_fast_gpu_time"), true).toBool();
|
||||||
Settings::values.force_30fps_mode =
|
Settings::values.force_30fps_mode =
|
||||||
|
@ -1090,6 +1092,8 @@ void Config::SaveRendererValues() {
|
||||||
WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
|
WriteSetting(QStringLiteral("use_asynchronous_gpu_emulation"),
|
||||||
Settings::values.use_asynchronous_gpu_emulation, false);
|
Settings::values.use_asynchronous_gpu_emulation, false);
|
||||||
WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
|
WriteSetting(QStringLiteral("use_vsync"), Settings::values.use_vsync, true);
|
||||||
|
WriteSetting(QStringLiteral("use_assembly_shaders"), Settings::values.use_assembly_shaders,
|
||||||
|
false);
|
||||||
WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true);
|
WriteSetting(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true);
|
||||||
WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false);
|
WriteSetting(QStringLiteral("force_30fps_mode"), Settings::values.force_30fps_mode, false);
|
||||||
|
|
||||||
|
|
|
@ -12,6 +12,9 @@ ConfigureGraphicsAdvanced::ConfigureGraphicsAdvanced(QWidget* parent)
|
||||||
|
|
||||||
ui->setupUi(this);
|
ui->setupUi(this);
|
||||||
|
|
||||||
|
// TODO: Remove this after assembly shaders are fully integrated
|
||||||
|
ui->use_assembly_shaders->setVisible(false);
|
||||||
|
|
||||||
SetConfiguration();
|
SetConfiguration();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -22,6 +25,8 @@ void ConfigureGraphicsAdvanced::SetConfiguration() {
|
||||||
ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
|
ui->gpu_accuracy->setCurrentIndex(static_cast<int>(Settings::values.gpu_accuracy));
|
||||||
ui->use_vsync->setEnabled(runtime_lock);
|
ui->use_vsync->setEnabled(runtime_lock);
|
||||||
ui->use_vsync->setChecked(Settings::values.use_vsync);
|
ui->use_vsync->setChecked(Settings::values.use_vsync);
|
||||||
|
ui->use_assembly_shaders->setEnabled(runtime_lock);
|
||||||
|
ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders);
|
||||||
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time);
|
ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time);
|
||||||
ui->force_30fps_mode->setEnabled(runtime_lock);
|
ui->force_30fps_mode->setEnabled(runtime_lock);
|
||||||
ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
|
ui->force_30fps_mode->setChecked(Settings::values.force_30fps_mode);
|
||||||
|
@ -33,6 +38,7 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() {
|
||||||
auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
|
auto gpu_accuracy = static_cast<Settings::GPUAccuracy>(ui->gpu_accuracy->currentIndex());
|
||||||
Settings::values.gpu_accuracy = gpu_accuracy;
|
Settings::values.gpu_accuracy = gpu_accuracy;
|
||||||
Settings::values.use_vsync = ui->use_vsync->isChecked();
|
Settings::values.use_vsync = ui->use_vsync->isChecked();
|
||||||
|
Settings::values.use_assembly_shaders = ui->use_assembly_shaders->isChecked();
|
||||||
Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked();
|
Settings::values.use_fast_gpu_time = ui->use_fast_gpu_time->isChecked();
|
||||||
Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
|
Settings::values.force_30fps_mode = ui->force_30fps_mode->isChecked();
|
||||||
Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
|
Settings::values.max_anisotropy = ui->anisotropic_filtering_combobox->currentIndex();
|
||||||
|
|
|
@ -62,6 +62,16 @@
|
||||||
</property>
|
</property>
|
||||||
</widget>
|
</widget>
|
||||||
</item>
|
</item>
|
||||||
|
<item>
|
||||||
|
<widget class="QCheckBox" name="use_assembly_shaders">
|
||||||
|
<property name="toolTip">
|
||||||
|
<string>Enabling this reduces shader stutter. Enables OpenGL assembly shaders on supported Nvidia devices (NV_gpu_program5 is required). This feature is experimental.</string>
|
||||||
|
</property>
|
||||||
|
<property name="text">
|
||||||
|
<string>Use assembly shaders (experimental, Nvidia OpenGL only)</string>
|
||||||
|
</property>
|
||||||
|
</widget>
|
||||||
|
</item>
|
||||||
<item>
|
<item>
|
||||||
<widget class="QCheckBox" name="force_30fps_mode">
|
<widget class="QCheckBox" name="force_30fps_mode">
|
||||||
<property name="text">
|
<property name="text">
|
||||||
|
|
|
@ -397,6 +397,8 @@ void Config::ReadValues() {
|
||||||
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
|
sdl2_config->GetBoolean("Renderer", "use_asynchronous_gpu_emulation", false);
|
||||||
Settings::values.use_vsync =
|
Settings::values.use_vsync =
|
||||||
static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1));
|
static_cast<u16>(sdl2_config->GetInteger("Renderer", "use_vsync", 1));
|
||||||
|
Settings::values.use_assembly_shaders =
|
||||||
|
sdl2_config->GetBoolean("Renderer", "use_assembly_shaders", false);
|
||||||
Settings::values.use_fast_gpu_time =
|
Settings::values.use_fast_gpu_time =
|
||||||
sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true);
|
sdl2_config->GetBoolean("Renderer", "use_fast_gpu_time", true);
|
||||||
|
|
||||||
|
|
|
@ -134,6 +134,10 @@ max_anisotropy =
|
||||||
# 0 (default): Off, 1: On
|
# 0 (default): Off, 1: On
|
||||||
use_vsync =
|
use_vsync =
|
||||||
|
|
||||||
|
# Whether to use OpenGL assembly shaders or not. NV_gpu_program5 is required.
|
||||||
|
# 0 (default): Off, 1: On
|
||||||
|
use_assembly_shaders =
|
||||||
|
|
||||||
# Turns on the frame limiter, which will limit frames output to the target game speed
|
# Turns on the frame limiter, which will limit frames output to the target game speed
|
||||||
# 0: Off, 1: On (default)
|
# 0: Off, 1: On (default)
|
||||||
use_frame_limit =
|
use_frame_limit =
|
||||||
|
|
Loading…
Reference in a new issue