Merge pull request #10693 from liamwhite/f64-to-f32
shader_recompiler: translate f64 to f32 when unsupported on host
This commit is contained in:
commit
ad8f122ab1
|
@ -223,6 +223,7 @@ add_library(shader_recompiler STATIC
|
|||
ir_opt/identity_removal_pass.cpp
|
||||
ir_opt/layer_pass.cpp
|
||||
ir_opt/lower_fp16_to_fp32.cpp
|
||||
ir_opt/lower_fp64_to_fp32.cpp
|
||||
ir_opt/lower_int64_to_int32.cpp
|
||||
ir_opt/passes.h
|
||||
ir_opt/position_pass.cpp
|
||||
|
|
|
@ -280,6 +280,9 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
|||
RemoveUnreachableBlocks(program);
|
||||
|
||||
// Replace instructions before the SSA rewrite
|
||||
if (!host_info.support_float64) {
|
||||
Optimization::LowerFp64ToFp32(program);
|
||||
}
|
||||
if (!host_info.support_float16) {
|
||||
Optimization::LowerFp16ToFp32(program);
|
||||
}
|
||||
|
|
|
@ -10,6 +10,7 @@ namespace Shader {
|
|||
|
||||
/// Misc information about the host
|
||||
struct HostTranslateInfo {
|
||||
bool support_float64{}; ///< True when the device supports 64-bit floats
|
||||
bool support_float16{}; ///< True when the device supports 16-bit floats
|
||||
bool support_int64{}; ///< True when the device supports 64-bit integers
|
||||
bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
|
||||
|
|
185
src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp
Normal file
185
src/shader_recompiler/ir_opt/lower_fp64_to_fp32.cpp
Normal file
|
@ -0,0 +1,185 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/opcodes.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
|
||||
constexpr s32 F64ToF32Exp = +1023 - 127;
|
||||
constexpr s32 F32ToF64Exp = +127 - 1023;
|
||||
|
||||
IR::F32 PackedF64ToF32(IR::IREmitter& ir, const IR::Value& packed) {
|
||||
const IR::U32 lo{ir.CompositeExtract(packed, 0)};
|
||||
const IR::U32 hi{ir.CompositeExtract(packed, 1)};
|
||||
const IR::U32 sign{ir.BitFieldExtract(hi, ir.Imm32(31), ir.Imm32(1))};
|
||||
const IR::U32 exp{ir.BitFieldExtract(hi, ir.Imm32(20), ir.Imm32(11))};
|
||||
const IR::U32 mantissa_hi{ir.BitFieldExtract(hi, ir.Imm32(0), ir.Imm32(20))};
|
||||
const IR::U32 mantissa_lo{ir.BitFieldExtract(lo, ir.Imm32(29), ir.Imm32(3))};
|
||||
const IR::U32 mantissa{
|
||||
ir.BitwiseOr(ir.ShiftLeftLogical(mantissa_hi, ir.Imm32(3)), mantissa_lo)};
|
||||
const IR::U32 exp_if_subnorm{
|
||||
ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F64ToF32Exp)))};
|
||||
const IR::U32 exp_if_infnan{
|
||||
ir.Select(ir.IEqual(exp, ir.Imm32(0x7ff)), ir.Imm32(0xff), exp_if_subnorm)};
|
||||
const IR::U32 result{
|
||||
ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
|
||||
ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(23)), mantissa))};
|
||||
return ir.BitCast<IR::F32>(result);
|
||||
}
|
||||
|
||||
IR::Value F32ToPackedF64(IR::IREmitter& ir, const IR::Value& raw) {
|
||||
const IR::U32 value{ir.BitCast<IR::U32>(IR::F32(raw))};
|
||||
const IR::U32 sign{ir.BitFieldExtract(value, ir.Imm32(31), ir.Imm32(1))};
|
||||
const IR::U32 exp{ir.BitFieldExtract(value, ir.Imm32(23), ir.Imm32(8))};
|
||||
const IR::U32 mantissa{ir.BitFieldExtract(value, ir.Imm32(0), ir.Imm32(23))};
|
||||
const IR::U32 mantissa_hi{ir.BitFieldExtract(mantissa, ir.Imm32(3), ir.Imm32(20))};
|
||||
const IR::U32 mantissa_lo{ir.BitFieldExtract(mantissa, ir.Imm32(0), ir.Imm32(3))};
|
||||
const IR::U32 exp_if_subnorm{
|
||||
ir.Select(ir.IEqual(exp, ir.Imm32(0)), ir.Imm32(0), ir.IAdd(exp, ir.Imm32(F32ToF64Exp)))};
|
||||
const IR::U32 exp_if_infnan{
|
||||
ir.Select(ir.IEqual(exp, ir.Imm32(0xff)), ir.Imm32(0x7ff), exp_if_subnorm)};
|
||||
const IR::U32 lo{ir.ShiftLeftLogical(mantissa_lo, ir.Imm32(29))};
|
||||
const IR::U32 hi{
|
||||
ir.BitwiseOr(ir.ShiftLeftLogical(sign, ir.Imm32(31)),
|
||||
ir.BitwiseOr(ir.ShiftLeftLogical(exp_if_infnan, ir.Imm32(20)), mantissa_hi))};
|
||||
return ir.CompositeConstruct(lo, hi);
|
||||
}
|
||||
|
||||
IR::Opcode Replace(IR::Opcode op) {
|
||||
switch (op) {
|
||||
case IR::Opcode::FPAbs64:
|
||||
return IR::Opcode::FPAbs32;
|
||||
case IR::Opcode::FPAdd64:
|
||||
return IR::Opcode::FPAdd32;
|
||||
case IR::Opcode::FPCeil64:
|
||||
return IR::Opcode::FPCeil32;
|
||||
case IR::Opcode::FPFloor64:
|
||||
return IR::Opcode::FPFloor32;
|
||||
case IR::Opcode::FPFma64:
|
||||
return IR::Opcode::FPFma32;
|
||||
case IR::Opcode::FPMul64:
|
||||
return IR::Opcode::FPMul32;
|
||||
case IR::Opcode::FPNeg64:
|
||||
return IR::Opcode::FPNeg32;
|
||||
case IR::Opcode::FPRoundEven64:
|
||||
return IR::Opcode::FPRoundEven32;
|
||||
case IR::Opcode::FPSaturate64:
|
||||
return IR::Opcode::FPSaturate32;
|
||||
case IR::Opcode::FPClamp64:
|
||||
return IR::Opcode::FPClamp32;
|
||||
case IR::Opcode::FPTrunc64:
|
||||
return IR::Opcode::FPTrunc32;
|
||||
case IR::Opcode::CompositeConstructF64x2:
|
||||
return IR::Opcode::CompositeConstructF32x2;
|
||||
case IR::Opcode::CompositeConstructF64x3:
|
||||
return IR::Opcode::CompositeConstructF32x3;
|
||||
case IR::Opcode::CompositeConstructF64x4:
|
||||
return IR::Opcode::CompositeConstructF32x4;
|
||||
case IR::Opcode::CompositeExtractF64x2:
|
||||
return IR::Opcode::CompositeExtractF32x2;
|
||||
case IR::Opcode::CompositeExtractF64x3:
|
||||
return IR::Opcode::CompositeExtractF32x3;
|
||||
case IR::Opcode::CompositeExtractF64x4:
|
||||
return IR::Opcode::CompositeExtractF32x4;
|
||||
case IR::Opcode::CompositeInsertF64x2:
|
||||
return IR::Opcode::CompositeInsertF32x2;
|
||||
case IR::Opcode::CompositeInsertF64x3:
|
||||
return IR::Opcode::CompositeInsertF32x3;
|
||||
case IR::Opcode::CompositeInsertF64x4:
|
||||
return IR::Opcode::CompositeInsertF32x4;
|
||||
case IR::Opcode::FPOrdEqual64:
|
||||
return IR::Opcode::FPOrdEqual32;
|
||||
case IR::Opcode::FPUnordEqual64:
|
||||
return IR::Opcode::FPUnordEqual32;
|
||||
case IR::Opcode::FPOrdNotEqual64:
|
||||
return IR::Opcode::FPOrdNotEqual32;
|
||||
case IR::Opcode::FPUnordNotEqual64:
|
||||
return IR::Opcode::FPUnordNotEqual32;
|
||||
case IR::Opcode::FPOrdLessThan64:
|
||||
return IR::Opcode::FPOrdLessThan32;
|
||||
case IR::Opcode::FPUnordLessThan64:
|
||||
return IR::Opcode::FPUnordLessThan32;
|
||||
case IR::Opcode::FPOrdGreaterThan64:
|
||||
return IR::Opcode::FPOrdGreaterThan32;
|
||||
case IR::Opcode::FPUnordGreaterThan64:
|
||||
return IR::Opcode::FPUnordGreaterThan32;
|
||||
case IR::Opcode::FPOrdLessThanEqual64:
|
||||
return IR::Opcode::FPOrdLessThanEqual32;
|
||||
case IR::Opcode::FPUnordLessThanEqual64:
|
||||
return IR::Opcode::FPUnordLessThanEqual32;
|
||||
case IR::Opcode::FPOrdGreaterThanEqual64:
|
||||
return IR::Opcode::FPOrdGreaterThanEqual32;
|
||||
case IR::Opcode::FPUnordGreaterThanEqual64:
|
||||
return IR::Opcode::FPUnordGreaterThanEqual32;
|
||||
case IR::Opcode::FPIsNan64:
|
||||
return IR::Opcode::FPIsNan32;
|
||||
case IR::Opcode::ConvertS16F64:
|
||||
return IR::Opcode::ConvertS16F32;
|
||||
case IR::Opcode::ConvertS32F64:
|
||||
return IR::Opcode::ConvertS32F32;
|
||||
case IR::Opcode::ConvertS64F64:
|
||||
return IR::Opcode::ConvertS64F32;
|
||||
case IR::Opcode::ConvertU16F64:
|
||||
return IR::Opcode::ConvertU16F32;
|
||||
case IR::Opcode::ConvertU32F64:
|
||||
return IR::Opcode::ConvertU32F32;
|
||||
case IR::Opcode::ConvertU64F64:
|
||||
return IR::Opcode::ConvertU64F32;
|
||||
case IR::Opcode::ConvertF32F64:
|
||||
return IR::Opcode::Identity;
|
||||
case IR::Opcode::ConvertF64F32:
|
||||
return IR::Opcode::Identity;
|
||||
case IR::Opcode::ConvertF64S8:
|
||||
return IR::Opcode::ConvertF32S8;
|
||||
case IR::Opcode::ConvertF64S16:
|
||||
return IR::Opcode::ConvertF32S16;
|
||||
case IR::Opcode::ConvertF64S32:
|
||||
return IR::Opcode::ConvertF32S32;
|
||||
case IR::Opcode::ConvertF64S64:
|
||||
return IR::Opcode::ConvertF32S64;
|
||||
case IR::Opcode::ConvertF64U8:
|
||||
return IR::Opcode::ConvertF32U8;
|
||||
case IR::Opcode::ConvertF64U16:
|
||||
return IR::Opcode::ConvertF32U16;
|
||||
case IR::Opcode::ConvertF64U32:
|
||||
return IR::Opcode::ConvertF32U32;
|
||||
case IR::Opcode::ConvertF64U64:
|
||||
return IR::Opcode::ConvertF32U64;
|
||||
default:
|
||||
return op;
|
||||
}
|
||||
}
|
||||
|
||||
void Lower(IR::Block& block, IR::Inst& inst) {
|
||||
switch (inst.GetOpcode()) {
|
||||
case IR::Opcode::PackDouble2x32: {
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
inst.ReplaceUsesWith(PackedF64ToF32(ir, inst.Arg(0)));
|
||||
break;
|
||||
}
|
||||
case IR::Opcode::UnpackDouble2x32: {
|
||||
IR::IREmitter ir(block, IR::Block::InstructionList::s_iterator_to(inst));
|
||||
inst.ReplaceUsesWith(F32ToPackedF64(ir, inst.Arg(0)));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
inst.ReplaceOpcode(Replace(inst.GetOpcode()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void LowerFp64ToFp32(IR::Program& program) {
|
||||
for (IR::Block* const block : program.blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
Lower(*block, inst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
|
@ -17,6 +17,7 @@ void ConstantPropagationPass(Environment& env, IR::Program& program);
|
|||
void DeadCodeEliminationPass(IR::Program& program);
|
||||
void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
||||
void IdentityRemovalPass(IR::Program& program);
|
||||
void LowerFp64ToFp32(IR::Program& program);
|
||||
void LowerFp16ToFp32(IR::Program& program);
|
||||
void LowerInt64ToInt32(IR::Program& program);
|
||||
void RescalingPass(IR::Program& program);
|
||||
|
|
|
@ -232,6 +232,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo
|
|||
.gl_max_compute_smem_size = device.GetMaxComputeSharedMemorySize(),
|
||||
},
|
||||
host_info{
|
||||
.support_float64 = true,
|
||||
.support_float16 = false,
|
||||
.support_int64 = device.HasShaderInt64(),
|
||||
.needs_demote_reorder = device.IsAmd(),
|
||||
|
|
|
@ -350,6 +350,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device
|
|||
.has_broken_spirv_subgroup_mask_vector_extract_dynamic =
|
||||
driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY};
|
||||
host_info = Shader::HostTranslateInfo{
|
||||
.support_float64 = device.IsFloat64Supported(),
|
||||
.support_float16 = device.IsFloat16Supported(),
|
||||
.support_int64 = device.IsShaderInt64Supported(),
|
||||
.needs_demote_reorder =
|
||||
|
|
|
@ -300,6 +300,11 @@ public:
|
|||
return GetDriverID() != VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
|
||||
}
|
||||
|
||||
/// Returns true if the device suppors float64 natively.
|
||||
bool IsFloat64Supported() const {
|
||||
return features.features.shaderFloat64;
|
||||
}
|
||||
|
||||
/// Returns true if the device supports float16 natively.
|
||||
bool IsFloat16Supported() const {
|
||||
return features.shader_float16_int8.shaderFloat16;
|
||||
|
|
Loading…
Reference in a new issue