Merge pull request #3312 from ReinUsesLisp/atoms-u32
shader/memory: Implement ATOMS.ADD.U32
This commit is contained in:
commit
15163edaaa
|
@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
|
||||||
Trunc = 11,
|
Trunc = 11,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class AtomicOp : u64 {
|
||||||
|
Add = 0,
|
||||||
|
Min = 1,
|
||||||
|
Max = 2,
|
||||||
|
Inc = 3,
|
||||||
|
Dec = 4,
|
||||||
|
And = 5,
|
||||||
|
Or = 6,
|
||||||
|
Xor = 7,
|
||||||
|
Exch = 8,
|
||||||
|
};
|
||||||
|
|
||||||
enum class UniformType : u64 {
|
enum class UniformType : u64 {
|
||||||
UnsignedByte = 0,
|
UnsignedByte = 0,
|
||||||
SignedByte = 1,
|
SignedByte = 1,
|
||||||
|
@ -236,6 +248,13 @@ enum class StoreType : u64 {
|
||||||
Bits128 = 6,
|
Bits128 = 6,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class AtomicType : u64 {
|
||||||
|
U32 = 0,
|
||||||
|
S32 = 1,
|
||||||
|
U64 = 2,
|
||||||
|
S64 = 3,
|
||||||
|
};
|
||||||
|
|
||||||
enum class IMinMaxExchange : u64 {
|
enum class IMinMaxExchange : u64 {
|
||||||
None = 0,
|
None = 0,
|
||||||
XLo = 1,
|
XLo = 1,
|
||||||
|
@ -938,6 +957,16 @@ union Instruction {
|
||||||
BitField<46, 2, u64> cache_mode;
|
BitField<46, 2, u64> cache_mode;
|
||||||
} stg;
|
} stg;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<52, 4, AtomicOp> operation;
|
||||||
|
BitField<28, 2, AtomicType> type;
|
||||||
|
BitField<30, 22, s64> offset;
|
||||||
|
|
||||||
|
s32 GetImmediateOffset() const {
|
||||||
|
return static_cast<s32>(offset << 2);
|
||||||
|
}
|
||||||
|
} atoms;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
BitField<32, 1, PhysicalAttributeDirection> direction;
|
BitField<32, 1, PhysicalAttributeDirection> direction;
|
||||||
BitField<47, 3, AttributeSize> size;
|
BitField<47, 3, AttributeSize> size;
|
||||||
|
@ -1659,9 +1688,10 @@ public:
|
||||||
ST_A,
|
ST_A,
|
||||||
ST_L,
|
ST_L,
|
||||||
ST_S,
|
ST_S,
|
||||||
ST, // Store in generic memory
|
ST, // Store in generic memory
|
||||||
STG, // Store in global memory
|
STG, // Store in global memory
|
||||||
AL2P, // Transforms attribute memory into physical memory
|
ATOMS, // Atomic operation on shared memory
|
||||||
|
AL2P, // Transforms attribute memory into physical memory
|
||||||
TEX,
|
TEX,
|
||||||
TEX_B, // Texture Load Bindless
|
TEX_B, // Texture Load Bindless
|
||||||
TXQ, // Texture Query
|
TXQ, // Texture Query
|
||||||
|
@ -1964,6 +1994,7 @@ private:
|
||||||
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||||
INST("101-------------", Id::ST, Type::Memory, "ST"),
|
INST("101-------------", Id::ST, Type::Memory, "ST"),
|
||||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||||
|
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
|
||||||
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
||||||
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
||||||
INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
|
INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
|
||||||
|
|
|
@ -1856,6 +1856,16 @@ private:
|
||||||
Type::Uint};
|
Type::Uint};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <const std::string_view& opname, Type type>
|
||||||
|
Expression Atomic(Operation operation) {
|
||||||
|
ASSERT(stage == ShaderType::Compute);
|
||||||
|
auto& smem = std::get<SmemNode>(*operation[0]);
|
||||||
|
|
||||||
|
return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
|
||||||
|
Visit(operation[1]).As(type)),
|
||||||
|
type};
|
||||||
|
}
|
||||||
|
|
||||||
Expression Branch(Operation operation) {
|
Expression Branch(Operation operation) {
|
||||||
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
|
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
|
||||||
UNIMPLEMENTED_IF(!target);
|
UNIMPLEMENTED_IF(!target);
|
||||||
|
@ -2194,6 +2204,8 @@ private:
|
||||||
&GLSLDecompiler::AtomicImage<Func::Xor>,
|
&GLSLDecompiler::AtomicImage<Func::Xor>,
|
||||||
&GLSLDecompiler::AtomicImage<Func::Exchange>,
|
&GLSLDecompiler::AtomicImage<Func::Exchange>,
|
||||||
|
|
||||||
|
&GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
|
||||||
|
|
||||||
&GLSLDecompiler::Branch,
|
&GLSLDecompiler::Branch,
|
||||||
&GLSLDecompiler::BranchIndirect,
|
&GLSLDecompiler::BranchIndirect,
|
||||||
&GLSLDecompiler::PushFlowStack,
|
&GLSLDecompiler::PushFlowStack,
|
||||||
|
|
|
@ -1796,6 +1796,11 @@ private:
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Expression UAtomicAdd(Operation) {
|
||||||
|
UNIMPLEMENTED();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
Expression Branch(Operation operation) {
|
Expression Branch(Operation operation) {
|
||||||
const auto& target = std::get<ImmediateNode>(*operation[0]);
|
const auto& target = std::get<ImmediateNode>(*operation[0]);
|
||||||
OpStore(jmp_to, Constant(t_uint, target.GetValue()));
|
OpStore(jmp_to, Constant(t_uint, target.GetValue()));
|
||||||
|
@ -2373,6 +2378,8 @@ private:
|
||||||
&SPIRVDecompiler::AtomicImageXor,
|
&SPIRVDecompiler::AtomicImageXor,
|
||||||
&SPIRVDecompiler::AtomicImageExchange,
|
&SPIRVDecompiler::AtomicImageExchange,
|
||||||
|
|
||||||
|
&SPIRVDecompiler::UAtomicAdd,
|
||||||
|
|
||||||
&SPIRVDecompiler::Branch,
|
&SPIRVDecompiler::Branch,
|
||||||
&SPIRVDecompiler::BranchIndirect,
|
&SPIRVDecompiler::BranchIndirect,
|
||||||
&SPIRVDecompiler::PushFlowStack,
|
&SPIRVDecompiler::PushFlowStack,
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
|
|
||||||
namespace VideoCommon::Shader {
|
namespace VideoCommon::Shader {
|
||||||
|
|
||||||
|
using Tegra::Shader::AtomicOp;
|
||||||
|
using Tegra::Shader::AtomicType;
|
||||||
using Tegra::Shader::Attribute;
|
using Tegra::Shader::Attribute;
|
||||||
using Tegra::Shader::Instruction;
|
using Tegra::Shader::Instruction;
|
||||||
using Tegra::Shader::OpCode;
|
using Tegra::Shader::OpCode;
|
||||||
|
@ -333,6 +335,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case OpCode::Id::ATOMS: {
|
||||||
|
UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
|
||||||
|
static_cast<int>(instr.atoms.operation.Value()));
|
||||||
|
UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
|
||||||
|
static_cast<int>(instr.atoms.type.Value()));
|
||||||
|
|
||||||
|
const s32 offset = instr.atoms.GetImmediateOffset();
|
||||||
|
Node address = GetRegister(instr.gpr8);
|
||||||
|
address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
|
||||||
|
|
||||||
|
Node memory = GetSharedMemory(std::move(address));
|
||||||
|
Node data = GetRegister(instr.gpr20);
|
||||||
|
|
||||||
|
Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
|
||||||
|
SetRegister(bb, instr.gpr0, std::move(value));
|
||||||
|
break;
|
||||||
|
}
|
||||||
case OpCode::Id::AL2P: {
|
case OpCode::Id::AL2P: {
|
||||||
// Ignore al2p.direction since we don't care about it.
|
// Ignore al2p.direction since we don't care about it.
|
||||||
|
|
||||||
|
|
|
@ -162,6 +162,8 @@ enum class OperationCode {
|
||||||
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
|
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
|
||||||
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
|
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
|
||||||
|
|
||||||
|
UAtomicAdd, /// (smem, uint) -> uint
|
||||||
|
|
||||||
Branch, /// (uint branch_target) -> void
|
Branch, /// (uint branch_target) -> void
|
||||||
BranchIndirect, /// (uint branch_target) -> void
|
BranchIndirect, /// (uint branch_target) -> void
|
||||||
PushFlowStack, /// (uint branch_target) -> void
|
PushFlowStack, /// (uint branch_target) -> void
|
||||||
|
|
Loading…
Reference in a new issue