shader: Make typed IR

This commit is contained in:
ReinUsesLisp 2021-02-05 19:19:36 -03:00 committed by ameerj
parent dc04a50ac2
commit be94ee88d2
19 changed files with 494 additions and 268 deletions

View file

@ -32,16 +32,16 @@ U32 IREmitter::Imm32(s32 value) const {
return U32{Value{static_cast<u32>(value)}}; return U32{Value{static_cast<u32>(value)}};
} }
U32 IREmitter::Imm32(f32 value) const { F32 IREmitter::Imm32(f32 value) const {
return U32{Value{Common::BitCast<u32>(value)}}; return F32{Value{value}};
} }
U64 IREmitter::Imm64(u64 value) const { U64 IREmitter::Imm64(u64 value) const {
return U64{Value{value}}; return U64{Value{value}};
} }
U64 IREmitter::Imm64(f64 value) const { F64 IREmitter::Imm64(f64 value) const {
return U64{Value{Common::BitCast<u64>(value)}}; return F64{Value{value}};
} }
void IREmitter::Branch(IR::Block* label) { void IREmitter::Branch(IR::Block* label) {
@ -121,11 +121,11 @@ void IREmitter::SetOFlag(const U1& value) {
Inst(Opcode::SetOFlag, value); Inst(Opcode::SetOFlag, value);
} }
U32 IREmitter::GetAttribute(IR::Attribute attribute) { F32 IREmitter::GetAttribute(IR::Attribute attribute) {
return Inst<U32>(Opcode::GetAttribute, attribute); return Inst<F32>(Opcode::GetAttribute, attribute);
} }
void IREmitter::SetAttribute(IR::Attribute attribute, const U32& value) { void IREmitter::SetAttribute(IR::Attribute attribute, const F32& value) {
Inst(Opcode::SetAttribute, attribute, value); Inst(Opcode::SetAttribute, attribute, value);
} }
@ -225,50 +225,113 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) {
return Inst<U1>(Opcode::GetOverflowFromOp, op); return Inst<U1>(Opcode::GetOverflowFromOp, op);
} }
U16U32U64 IREmitter::FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control) { F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
if (a.Type() != a.Type()) { if (a.Type() != a.Type()) {
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
} }
switch (a.Type()) { switch (a.Type()) {
case Type::U16: case Type::F16:
return Inst<U16>(Opcode::FPAdd16, Flags{control}, a, b); return Inst<F16>(Opcode::FPAdd16, Flags{control}, a, b);
case Type::U32: case Type::F32:
return Inst<U32>(Opcode::FPAdd32, Flags{control}, a, b); return Inst<F32>(Opcode::FPAdd32, Flags{control}, a, b);
case Type::U64: case Type::F64:
return Inst<U64>(Opcode::FPAdd64, Flags{control}, a, b); return Inst<F64>(Opcode::FPAdd64, Flags{control}, a, b);
default: default:
ThrowInvalidType(a.Type()); ThrowInvalidType(a.Type());
} }
} }
Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2) { Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2) {
if (e1.Type() != e2.Type()) { if (e1.Type() != e2.Type()) {
throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type()); throw InvalidArgument("Mismatching types {} and {}", e1.Type(), e2.Type());
} }
return Inst(Opcode::CompositeConstruct2, e1, e2); switch (e1.Type()) {
case Type::U32:
return Inst(Opcode::CompositeConstructU32x2, e1, e2);
case Type::F16:
return Inst(Opcode::CompositeConstructF16x2, e1, e2);
case Type::F32:
return Inst(Opcode::CompositeConstructF32x2, e1, e2);
case Type::F64:
return Inst(Opcode::CompositeConstructF64x2, e1, e2);
default:
ThrowInvalidType(e1.Type());
}
} }
Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3) { Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3) {
if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type()) {
throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type()); throw InvalidArgument("Mismatching types {}, {}, and {}", e1.Type(), e2.Type(), e3.Type());
} }
return Inst(Opcode::CompositeConstruct3, e1, e2, e3); switch (e1.Type()) {
case Type::U32:
return Inst(Opcode::CompositeConstructU32x3, e1, e2, e3);
case Type::F16:
return Inst(Opcode::CompositeConstructF16x3, e1, e2, e3);
case Type::F32:
return Inst(Opcode::CompositeConstructF32x3, e1, e2, e3);
case Type::F64:
return Inst(Opcode::CompositeConstructF64x3, e1, e2, e3);
default:
ThrowInvalidType(e1.Type());
}
} }
Value IREmitter::CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
const UAny& e4) { const Value& e4) {
if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) { if (e1.Type() != e2.Type() || e1.Type() != e3.Type() || e1.Type() != e4.Type()) {
throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(), throw InvalidArgument("Mismatching types {}, {}, {}, and {}", e1.Type(), e2.Type(),
e3.Type(), e4.Type()); e3.Type(), e4.Type());
} }
return Inst(Opcode::CompositeConstruct4, e1, e2, e3, e4); switch (e1.Type()) {
case Type::U32:
return Inst(Opcode::CompositeConstructU32x4, e1, e2, e3, e4);
case Type::F16:
return Inst(Opcode::CompositeConstructF16x4, e1, e2, e3, e4);
case Type::F32:
return Inst(Opcode::CompositeConstructF32x4, e1, e2, e3, e4);
case Type::F64:
return Inst(Opcode::CompositeConstructF64x4, e1, e2, e3, e4);
default:
ThrowInvalidType(e1.Type());
}
} }
UAny IREmitter::CompositeExtract(const Value& vector, size_t element) { Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
if (element >= 4) { const auto read = [&](Opcode opcode, size_t limit) -> Value {
if (element >= limit) {
throw InvalidArgument("Out of bounds element {}", element); throw InvalidArgument("Out of bounds element {}", element);
} }
return Inst<UAny>(Opcode::CompositeExtract, vector, Imm32(static_cast<u32>(element))); return Inst(opcode, vector, Value{static_cast<u32>(element)});
};
switch (vector.Type()) {
case Type::U32x2:
return read(Opcode::CompositeExtractU32x2, 2);
case Type::U32x3:
return read(Opcode::CompositeExtractU32x3, 3);
case Type::U32x4:
return read(Opcode::CompositeExtractU32x4, 4);
case Type::F16x2:
return read(Opcode::CompositeExtractF16x2, 2);
case Type::F16x3:
return read(Opcode::CompositeExtractF16x3, 3);
case Type::F16x4:
return read(Opcode::CompositeExtractF16x4, 4);
case Type::F32x2:
return read(Opcode::CompositeExtractF32x2, 2);
case Type::F32x3:
return read(Opcode::CompositeExtractF32x3, 3);
case Type::F32x4:
return read(Opcode::CompositeExtractF32x4, 4);
case Type::F64x2:
return read(Opcode::CompositeExtractF64x2, 2);
case Type::F64x3:
return read(Opcode::CompositeExtractF64x3, 3);
case Type::F64x4:
return read(Opcode::CompositeExtractF64x4, 4);
default:
ThrowInvalidType(vector.Type());
}
} }
UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) { UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny& false_value) {
@ -289,6 +352,36 @@ UAny IREmitter::Select(const U1& condition, const UAny& true_value, const UAny&
} }
} }
template <>
IR::U32 IREmitter::BitCast<IR::U32, IR::F32>(const IR::F32& value) {
return Inst<IR::U32>(Opcode::BitCastU32F32, value);
}
template <>
IR::F32 IREmitter::BitCast<IR::F32, IR::U32>(const IR::U32& value) {
return Inst<IR::F32>(Opcode::BitCastF32U32, value);
}
template <>
IR::U16 IREmitter::BitCast<IR::U16, IR::F16>(const IR::F16& value) {
return Inst<IR::U16>(Opcode::BitCastU16F16, value);
}
template <>
IR::F16 IREmitter::BitCast<IR::F16, IR::U16>(const IR::U16& value) {
return Inst<IR::F16>(Opcode::BitCastF16U16, value);
}
template <>
IR::U64 IREmitter::BitCast<IR::U64, IR::F64>(const IR::F64& value) {
return Inst<IR::U64>(Opcode::BitCastU64F64, value);
}
template <>
IR::F64 IREmitter::BitCast<IR::F64, IR::U64>(const IR::U64& value) {
return Inst<IR::F64>(Opcode::BitCastF64U64, value);
}
U64 IREmitter::PackUint2x32(const Value& vector) { U64 IREmitter::PackUint2x32(const Value& vector) {
return Inst<U64>(Opcode::PackUint2x32, vector); return Inst<U64>(Opcode::PackUint2x32, vector);
} }
@ -305,75 +398,75 @@ Value IREmitter::UnpackFloat2x16(const U32& value) {
return Inst<Value>(Opcode::UnpackFloat2x16, value); return Inst<Value>(Opcode::UnpackFloat2x16, value);
} }
U64 IREmitter::PackDouble2x32(const Value& vector) { F64 IREmitter::PackDouble2x32(const Value& vector) {
return Inst<U64>(Opcode::PackDouble2x32, vector); return Inst<F64>(Opcode::PackDouble2x32, vector);
} }
Value IREmitter::UnpackDouble2x32(const U64& value) { Value IREmitter::UnpackDouble2x32(const F64& value) {
return Inst<Value>(Opcode::UnpackDouble2x32, value); return Inst<Value>(Opcode::UnpackDouble2x32, value);
} }
U16U32U64 IREmitter::FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control) { F16F32F64 IREmitter::FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control) {
if (a.Type() != b.Type()) { if (a.Type() != b.Type()) {
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
} }
switch (a.Type()) { switch (a.Type()) {
case Type::U16: case Type::F16:
return Inst<U16>(Opcode::FPMul16, Flags{control}, a, b); return Inst<F16>(Opcode::FPMul16, Flags{control}, a, b);
case Type::U32: case Type::F32:
return Inst<U32>(Opcode::FPMul32, Flags{control}, a, b); return Inst<F32>(Opcode::FPMul32, Flags{control}, a, b);
case Type::U64: case Type::F64:
return Inst<U64>(Opcode::FPMul64, Flags{control}, a, b); return Inst<F64>(Opcode::FPMul64, Flags{control}, a, b);
default: default:
ThrowInvalidType(a.Type()); ThrowInvalidType(a.Type());
} }
} }
U16U32U64 IREmitter::FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, F16F32F64 IREmitter::FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
FpControl control) { FpControl control) {
if (a.Type() != b.Type() || a.Type() != c.Type()) { if (a.Type() != b.Type() || a.Type() != c.Type()) {
throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type()); throw InvalidArgument("Mismatching types {}, {}, and {}", a.Type(), b.Type(), c.Type());
} }
switch (a.Type()) { switch (a.Type()) {
case Type::U16: case Type::F16:
return Inst<U16>(Opcode::FPFma16, Flags{control}, a, b, c); return Inst<F16>(Opcode::FPFma16, Flags{control}, a, b, c);
case Type::U32: case Type::F32:
return Inst<U32>(Opcode::FPFma32, Flags{control}, a, b, c); return Inst<F32>(Opcode::FPFma32, Flags{control}, a, b, c);
case Type::U64: case Type::F64:
return Inst<U64>(Opcode::FPFma64, Flags{control}, a, b, c); return Inst<F64>(Opcode::FPFma64, Flags{control}, a, b, c);
default: default:
ThrowInvalidType(a.Type()); ThrowInvalidType(a.Type());
} }
} }
U16U32U64 IREmitter::FPAbs(const U16U32U64& value) { F16F32F64 IREmitter::FPAbs(const F16F32F64& value) {
switch (value.Type()) { switch (value.Type()) {
case Type::U16: case Type::U16:
return Inst<U16>(Opcode::FPAbs16, value); return Inst<F16>(Opcode::FPAbs16, value);
case Type::U32: case Type::U32:
return Inst<U32>(Opcode::FPAbs32, value); return Inst<F32>(Opcode::FPAbs32, value);
case Type::U64: case Type::U64:
return Inst<U64>(Opcode::FPAbs64, value); return Inst<F64>(Opcode::FPAbs64, value);
default: default:
ThrowInvalidType(value.Type()); ThrowInvalidType(value.Type());
} }
} }
U16U32U64 IREmitter::FPNeg(const U16U32U64& value) { F16F32F64 IREmitter::FPNeg(const F16F32F64& value) {
switch (value.Type()) { switch (value.Type()) {
case Type::U16: case Type::U16:
return Inst<U16>(Opcode::FPNeg16, value); return Inst<F16>(Opcode::FPNeg16, value);
case Type::U32: case Type::U32:
return Inst<U32>(Opcode::FPNeg32, value); return Inst<F32>(Opcode::FPNeg32, value);
case Type::U64: case Type::U64:
return Inst<U64>(Opcode::FPNeg64, value); return Inst<F64>(Opcode::FPNeg64, value);
default: default:
ThrowInvalidType(value.Type()); ThrowInvalidType(value.Type());
} }
} }
U16U32U64 IREmitter::FPAbsNeg(const U16U32U64& value, bool abs, bool neg) { F16F32F64 IREmitter::FPAbsNeg(const F16F32F64& value, bool abs, bool neg) {
U16U32U64 result{value}; F16F32F64 result{value};
if (abs) { if (abs) {
result = FPAbs(value); result = FPAbs(value);
} }
@ -383,108 +476,108 @@ U16U32U64 IREmitter::FPAbsNeg(const U16U32U64& value, bool abs, bool neg) {
return result; return result;
} }
U32 IREmitter::FPCosNotReduced(const U32& value) { F32 IREmitter::FPCosNotReduced(const F32& value) {
return Inst<U32>(Opcode::FPCosNotReduced, value); return Inst<F32>(Opcode::FPCosNotReduced, value);
} }
U32 IREmitter::FPExp2NotReduced(const U32& value) { F32 IREmitter::FPExp2NotReduced(const F32& value) {
return Inst<U32>(Opcode::FPExp2NotReduced, value); return Inst<F32>(Opcode::FPExp2NotReduced, value);
} }
U32 IREmitter::FPLog2(const U32& value) { F32 IREmitter::FPLog2(const F32& value) {
return Inst<U32>(Opcode::FPLog2, value); return Inst<F32>(Opcode::FPLog2, value);
} }
U32U64 IREmitter::FPRecip(const U32U64& value) { F32F64 IREmitter::FPRecip(const F32F64& value) {
switch (value.Type()) { switch (value.Type()) {
case Type::U32: case Type::U32:
return Inst<U32>(Opcode::FPRecip32, value); return Inst<F32>(Opcode::FPRecip32, value);
case Type::U64: case Type::U64:
return Inst<U64>(Opcode::FPRecip64, value); return Inst<F64>(Opcode::FPRecip64, value);
default: default:
ThrowInvalidType(value.Type()); ThrowInvalidType(value.Type());
} }
} }
U32U64 IREmitter::FPRecipSqrt(const U32U64& value) { F32F64 IREmitter::FPRecipSqrt(const F32F64& value) {
switch (value.Type()) { switch (value.Type()) {
case Type::U32: case Type::U32:
return Inst<U32>(Opcode::FPRecipSqrt32, value); return Inst<F32>(Opcode::FPRecipSqrt32, value);
case Type::U64: case Type::U64:
return Inst<U64>(Opcode::FPRecipSqrt64, value); return Inst<F64>(Opcode::FPRecipSqrt64, value);
default: default:
ThrowInvalidType(value.Type()); ThrowInvalidType(value.Type());
} }
} }
U32 IREmitter::FPSinNotReduced(const U32& value) { F32 IREmitter::FPSinNotReduced(const F32& value) {
return Inst<U32>(Opcode::FPSinNotReduced, value); return Inst<F32>(Opcode::FPSinNotReduced, value);
} }
U32 IREmitter::FPSqrt(const U32& value) { F32 IREmitter::FPSqrt(const F32& value) {
return Inst<U32>(Opcode::FPSqrt, value); return Inst<F32>(Opcode::FPSqrt, value);
} }
U16U32U64 IREmitter::FPSaturate(const U16U32U64& value) { F16F32F64 IREmitter::FPSaturate(const F16F32F64& value) {
switch (value.Type()) { switch (value.Type()) {
case Type::U16: case Type::U16:
return Inst<U16>(Opcode::FPSaturate16, value); return Inst<F16>(Opcode::FPSaturate16, value);
case Type::U32: case Type::U32:
return Inst<U32>(Opcode::FPSaturate32, value); return Inst<F32>(Opcode::FPSaturate32, value);
case Type::U64: case Type::U64:
return Inst<U64>(Opcode::FPSaturate64, value); return Inst<F64>(Opcode::FPSaturate64, value);
default: default:
ThrowInvalidType(value.Type()); ThrowInvalidType(value.Type());
} }
} }
U16U32U64 IREmitter::FPRoundEven(const U16U32U64& value) { F16F32F64 IREmitter::FPRoundEven(const F16F32F64& value) {
switch (value.Type()) { switch (value.Type()) {
case Type::U16: case Type::U16:
return Inst<U16>(Opcode::FPRoundEven16, value); return Inst<F16>(Opcode::FPRoundEven16, value);
case Type::U32: case Type::U32:
return Inst<U32>(Opcode::FPRoundEven32, value); return Inst<F32>(Opcode::FPRoundEven32, value);
case Type::U64: case Type::U64:
return Inst<U64>(Opcode::FPRoundEven64, value); return Inst<F64>(Opcode::FPRoundEven64, value);
default: default:
ThrowInvalidType(value.Type()); ThrowInvalidType(value.Type());
} }
} }
U16U32U64 IREmitter::FPFloor(const U16U32U64& value) { F16F32F64 IREmitter::FPFloor(const F16F32F64& value) {
switch (value.Type()) { switch (value.Type()) {
case Type::U16: case Type::U16:
return Inst<U16>(Opcode::FPFloor16, value); return Inst<F16>(Opcode::FPFloor16, value);
case Type::U32: case Type::U32:
return Inst<U32>(Opcode::FPFloor32, value); return Inst<F32>(Opcode::FPFloor32, value);
case Type::U64: case Type::U64:
return Inst<U64>(Opcode::FPFloor64, value); return Inst<F64>(Opcode::FPFloor64, value);
default: default:
ThrowInvalidType(value.Type()); ThrowInvalidType(value.Type());
} }
} }
U16U32U64 IREmitter::FPCeil(const U16U32U64& value) { F16F32F64 IREmitter::FPCeil(const F16F32F64& value) {
switch (value.Type()) { switch (value.Type()) {
case Type::U16: case Type::U16:
return Inst<U16>(Opcode::FPCeil16, value); return Inst<F16>(Opcode::FPCeil16, value);
case Type::U32: case Type::U32:
return Inst<U32>(Opcode::FPCeil32, value); return Inst<F32>(Opcode::FPCeil32, value);
case Type::U64: case Type::U64:
return Inst<U64>(Opcode::FPCeil64, value); return Inst<F64>(Opcode::FPCeil64, value);
default: default:
ThrowInvalidType(value.Type()); ThrowInvalidType(value.Type());
} }
} }
U16U32U64 IREmitter::FPTrunc(const U16U32U64& value) { F16F32F64 IREmitter::FPTrunc(const F16F32F64& value) {
switch (value.Type()) { switch (value.Type()) {
case Type::U16: case Type::U16:
return Inst<U16>(Opcode::FPTrunc16, value); return Inst<F16>(Opcode::FPTrunc16, value);
case Type::U32: case Type::U32:
return Inst<U32>(Opcode::FPTrunc32, value); return Inst<F32>(Opcode::FPTrunc32, value);
case Type::U64: case Type::U64:
return Inst<U64>(Opcode::FPTrunc64, value); return Inst<F64>(Opcode::FPTrunc64, value);
default: default:
ThrowInvalidType(value.Type()); ThrowInvalidType(value.Type());
} }
@ -605,7 +698,7 @@ U1 IREmitter::LogicalNot(const U1& value) {
return Inst<U1>(Opcode::LogicalNot, value); return Inst<U1>(Opcode::LogicalNot, value);
} }
U32U64 IREmitter::ConvertFToS(size_t bitsize, const U16U32U64& value) { U32U64 IREmitter::ConvertFToS(size_t bitsize, const F16F32F64& value) {
switch (bitsize) { switch (bitsize) {
case 16: case 16:
switch (value.Type()) { switch (value.Type()) {
@ -645,7 +738,7 @@ U32U64 IREmitter::ConvertFToS(size_t bitsize, const U16U32U64& value) {
} }
} }
U32U64 IREmitter::ConvertFToU(size_t bitsize, const U16U32U64& value) { U32U64 IREmitter::ConvertFToU(size_t bitsize, const F16F32F64& value) {
switch (bitsize) { switch (bitsize) {
case 16: case 16:
switch (value.Type()) { switch (value.Type()) {
@ -685,7 +778,7 @@ U32U64 IREmitter::ConvertFToU(size_t bitsize, const U16U32U64& value) {
} }
} }
U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value) { U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value) {
if (is_signed) { if (is_signed) {
return ConvertFToS(bitsize, value); return ConvertFToS(bitsize, value);
} else { } else {

View file

@ -27,9 +27,9 @@ public:
[[nodiscard]] U16 Imm16(u16 value) const; [[nodiscard]] U16 Imm16(u16 value) const;
[[nodiscard]] U32 Imm32(u32 value) const; [[nodiscard]] U32 Imm32(u32 value) const;
[[nodiscard]] U32 Imm32(s32 value) const; [[nodiscard]] U32 Imm32(s32 value) const;
[[nodiscard]] U32 Imm32(f32 value) const; [[nodiscard]] F32 Imm32(f32 value) const;
[[nodiscard]] U64 Imm64(u64 value) const; [[nodiscard]] U64 Imm64(u64 value) const;
[[nodiscard]] U64 Imm64(f64 value) const; [[nodiscard]] F64 Imm64(f64 value) const;
void Branch(IR::Block* label); void Branch(IR::Block* label);
void BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label); void BranchConditional(const U1& cond, IR::Block* true_label, IR::Block* false_label);
@ -55,8 +55,8 @@ public:
void SetCFlag(const U1& value); void SetCFlag(const U1& value);
void SetOFlag(const U1& value); void SetOFlag(const U1& value);
[[nodiscard]] U32 GetAttribute(IR::Attribute attribute); [[nodiscard]] F32 GetAttribute(IR::Attribute attribute);
void SetAttribute(IR::Attribute attribute, const U32& value); void SetAttribute(IR::Attribute attribute, const F32& value);
[[nodiscard]] U32 WorkgroupIdX(); [[nodiscard]] U32 WorkgroupIdX();
[[nodiscard]] U32 WorkgroupIdY(); [[nodiscard]] U32 WorkgroupIdY();
@ -87,44 +87,47 @@ public:
[[nodiscard]] U1 GetCarryFromOp(const Value& op); [[nodiscard]] U1 GetCarryFromOp(const Value& op);
[[nodiscard]] U1 GetOverflowFromOp(const Value& op); [[nodiscard]] U1 GetOverflowFromOp(const Value& op);
[[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
[[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3); [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
[[nodiscard]] Value CompositeConstruct(const UAny& e1, const UAny& e2, const UAny& e3, [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
const UAny& e4); const Value& e4);
[[nodiscard]] UAny CompositeExtract(const Value& vector, size_t element); [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
[[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value); [[nodiscard]] UAny Select(const U1& condition, const UAny& true_value, const UAny& false_value);
template <typename Dest, typename Source>
[[nodiscard]] Dest BitCast(const Source& value);
[[nodiscard]] U64 PackUint2x32(const Value& vector); [[nodiscard]] U64 PackUint2x32(const Value& vector);
[[nodiscard]] Value UnpackUint2x32(const U64& value); [[nodiscard]] Value UnpackUint2x32(const U64& value);
[[nodiscard]] U32 PackFloat2x16(const Value& vector); [[nodiscard]] U32 PackFloat2x16(const Value& vector);
[[nodiscard]] Value UnpackFloat2x16(const U32& value); [[nodiscard]] Value UnpackFloat2x16(const U32& value);
[[nodiscard]] U64 PackDouble2x32(const Value& vector); [[nodiscard]] F64 PackDouble2x32(const Value& vector);
[[nodiscard]] Value UnpackDouble2x32(const U64& value); [[nodiscard]] Value UnpackDouble2x32(const F64& value);
[[nodiscard]] U16U32U64 FPAdd(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); [[nodiscard]] F16F32F64 FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
[[nodiscard]] U16U32U64 FPMul(const U16U32U64& a, const U16U32U64& b, FpControl control = {}); [[nodiscard]] F16F32F64 FPMul(const F16F32F64& a, const F16F32F64& b, FpControl control = {});
[[nodiscard]] U16U32U64 FPFma(const U16U32U64& a, const U16U32U64& b, const U16U32U64& c, [[nodiscard]] F16F32F64 FPFma(const F16F32F64& a, const F16F32F64& b, const F16F32F64& c,
FpControl control = {}); FpControl control = {});
[[nodiscard]] U16U32U64 FPAbs(const U16U32U64& value); [[nodiscard]] F16F32F64 FPAbs(const F16F32F64& value);
[[nodiscard]] U16U32U64 FPNeg(const U16U32U64& value); [[nodiscard]] F16F32F64 FPNeg(const F16F32F64& value);
[[nodiscard]] U16U32U64 FPAbsNeg(const U16U32U64& value, bool abs, bool neg); [[nodiscard]] F16F32F64 FPAbsNeg(const F16F32F64& value, bool abs, bool neg);
[[nodiscard]] U32 FPCosNotReduced(const U32& value); [[nodiscard]] F32 FPCosNotReduced(const F32& value);
[[nodiscard]] U32 FPExp2NotReduced(const U32& value); [[nodiscard]] F32 FPExp2NotReduced(const F32& value);
[[nodiscard]] U32 FPLog2(const U32& value); [[nodiscard]] F32 FPLog2(const F32& value);
[[nodiscard]] U32U64 FPRecip(const U32U64& value); [[nodiscard]] F32F64 FPRecip(const F32F64& value);
[[nodiscard]] U32U64 FPRecipSqrt(const U32U64& value); [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value);
[[nodiscard]] U32 FPSinNotReduced(const U32& value); [[nodiscard]] F32 FPSinNotReduced(const F32& value);
[[nodiscard]] U32 FPSqrt(const U32& value); [[nodiscard]] F32 FPSqrt(const F32& value);
[[nodiscard]] U16U32U64 FPSaturate(const U16U32U64& value); [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value);
[[nodiscard]] U16U32U64 FPRoundEven(const U16U32U64& value); [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value);
[[nodiscard]] U16U32U64 FPFloor(const U16U32U64& value); [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value);
[[nodiscard]] U16U32U64 FPCeil(const U16U32U64& value); [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value);
[[nodiscard]] U16U32U64 FPTrunc(const U16U32U64& value); [[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value);
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b); [[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);
@ -154,9 +157,9 @@ public:
[[nodiscard]] U1 LogicalXor(const U1& a, const U1& b); [[nodiscard]] U1 LogicalXor(const U1& a, const U1& b);
[[nodiscard]] U1 LogicalNot(const U1& value); [[nodiscard]] U1 LogicalNot(const U1& value);
[[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const U16U32U64& value); [[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
[[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const U16U32U64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
[[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const U16U32U64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
[[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); [[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value);

View file

@ -52,15 +52,15 @@ OPCODE(LoadGlobalS8, U32, U64,
OPCODE(LoadGlobalU16, U32, U64, ) OPCODE(LoadGlobalU16, U32, U64, )
OPCODE(LoadGlobalS16, U32, U64, ) OPCODE(LoadGlobalS16, U32, U64, )
OPCODE(LoadGlobal32, U32, U64, ) OPCODE(LoadGlobal32, U32, U64, )
OPCODE(LoadGlobal64, Opaque, U64, ) OPCODE(LoadGlobal64, U32x2, U64, )
OPCODE(LoadGlobal128, Opaque, U64, ) OPCODE(LoadGlobal128, U32x4, U64, )
OPCODE(WriteGlobalU8, Void, U64, U32, ) OPCODE(WriteGlobalU8, Void, U64, U32, )
OPCODE(WriteGlobalS8, Void, U64, U32, ) OPCODE(WriteGlobalS8, Void, U64, U32, )
OPCODE(WriteGlobalU16, Void, U64, U32, ) OPCODE(WriteGlobalU16, Void, U64, U32, )
OPCODE(WriteGlobalS16, Void, U64, U32, ) OPCODE(WriteGlobalS16, Void, U64, U32, )
OPCODE(WriteGlobal32, Void, U64, U32, ) OPCODE(WriteGlobal32, Void, U64, U32, )
OPCODE(WriteGlobal64, Void, U64, Opaque, ) OPCODE(WriteGlobal64, Void, U64, U32x2, )
OPCODE(WriteGlobal128, Void, U64, Opaque, ) OPCODE(WriteGlobal128, Void, U64, U32x4, )
// Storage buffer operations // Storage buffer operations
OPCODE(LoadStorageU8, U32, U32, U32, ) OPCODE(LoadStorageU8, U32, U32, U32, )
@ -68,21 +68,41 @@ OPCODE(LoadStorageS8, U32, U32,
OPCODE(LoadStorageU16, U32, U32, U32, ) OPCODE(LoadStorageU16, U32, U32, U32, )
OPCODE(LoadStorageS16, U32, U32, U32, ) OPCODE(LoadStorageS16, U32, U32, U32, )
OPCODE(LoadStorage32, U32, U32, U32, ) OPCODE(LoadStorage32, U32, U32, U32, )
OPCODE(LoadStorage64, Opaque, U32, U32, ) OPCODE(LoadStorage64, U32x2, U32, U32, )
OPCODE(LoadStorage128, Opaque, U32, U32, ) OPCODE(LoadStorage128, U32x4, U32, U32, )
OPCODE(WriteStorageU8, Void, U32, U32, U32, ) OPCODE(WriteStorageU8, Void, U32, U32, U32, )
OPCODE(WriteStorageS8, Void, U32, U32, U32, ) OPCODE(WriteStorageS8, Void, U32, U32, U32, )
OPCODE(WriteStorageU16, Void, U32, U32, U32, ) OPCODE(WriteStorageU16, Void, U32, U32, U32, )
OPCODE(WriteStorageS16, Void, U32, U32, U32, ) OPCODE(WriteStorageS16, Void, U32, U32, U32, )
OPCODE(WriteStorage32, Void, U32, U32, U32, ) OPCODE(WriteStorage32, Void, U32, U32, U32, )
OPCODE(WriteStorage64, Void, U32, U32, Opaque, ) OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
OPCODE(WriteStorage128, Void, U32, U32, Opaque, ) OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
// Vector utility // Vector utility
OPCODE(CompositeConstruct2, Opaque, Opaque, Opaque, ) OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
OPCODE(CompositeConstruct3, Opaque, Opaque, Opaque, Opaque, ) OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
OPCODE(CompositeConstruct4, Opaque, Opaque, Opaque, Opaque, Opaque, ) OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
OPCODE(CompositeExtract, Opaque, Opaque, U32, ) OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
// Select operations // Select operations
OPCODE(Select8, U8, U1, U8, U8, ) OPCODE(Select8, U8, U1, U8, U8, )
@ -91,12 +111,18 @@ OPCODE(Select32, U32, U1,
OPCODE(Select64, U64, U1, U64, U64, ) OPCODE(Select64, U64, U1, U64, U64, )
// Bitwise conversions // Bitwise conversions
OPCODE(PackUint2x32, U64, Opaque, ) OPCODE(BitCastU16F16, U16, F16, )
OPCODE(UnpackUint2x32, Opaque, U64, ) OPCODE(BitCastU32F32, U32, F32, )
OPCODE(PackFloat2x16, U32, Opaque, ) OPCODE(BitCastU64F64, U64, F64, )
OPCODE(UnpackFloat2x16, Opaque, U32, ) OPCODE(BitCastF16U16, F16, U16, )
OPCODE(PackDouble2x32, U64, Opaque, ) OPCODE(BitCastF32U32, F32, U32, )
OPCODE(UnpackDouble2x32, Opaque, U64, ) OPCODE(BitCastF64U64, F64, U64, )
OPCODE(PackUint2x32, U64, U32x2, )
OPCODE(UnpackUint2x32, U32x2, U64, )
OPCODE(PackFloat2x16, U32, F16x2, )
OPCODE(UnpackFloat2x16, F16x2, U32, )
OPCODE(PackDouble2x32, U64, U32x2, )
OPCODE(UnpackDouble2x32, U32x2, U64, )
// Pseudo-operation, handled specially at final emit // Pseudo-operation, handled specially at final emit
OPCODE(GetZeroFromOp, U1, Opaque, ) OPCODE(GetZeroFromOp, U1, Opaque, )
@ -105,52 +131,52 @@ OPCODE(GetCarryFromOp, U1, Opaq
OPCODE(GetOverflowFromOp, U1, Opaque, ) OPCODE(GetOverflowFromOp, U1, Opaque, )
// Floating-point operations // Floating-point operations
OPCODE(FPAbs16, U16, U16, ) OPCODE(FPAbs16, F16, F16, )
OPCODE(FPAbs32, U32, U32, ) OPCODE(FPAbs32, F32, F32, )
OPCODE(FPAbs64, U64, U64, ) OPCODE(FPAbs64, F64, F64, )
OPCODE(FPAdd16, U16, U16, U16, ) OPCODE(FPAdd16, F16, F16, F16, )
OPCODE(FPAdd32, U32, U32, U32, ) OPCODE(FPAdd32, F32, F32, F32, )
OPCODE(FPAdd64, U64, U64, U64, ) OPCODE(FPAdd64, F64, F64, F64, )
OPCODE(FPFma16, U16, U16, U16, U16, ) OPCODE(FPFma16, F16, F16, F16, F16, )
OPCODE(FPFma32, U32, U32, U32, U32, ) OPCODE(FPFma32, F32, F32, F32, F32, )
OPCODE(FPFma64, U64, U64, U64, U64, ) OPCODE(FPFma64, F64, F64, F64, F64, )
OPCODE(FPMax32, U32, U32, U32, ) OPCODE(FPMax32, F32, F32, F32, )
OPCODE(FPMax64, U64, U64, U64, ) OPCODE(FPMax64, F64, F64, F64, )
OPCODE(FPMin32, U32, U32, U32, ) OPCODE(FPMin32, F32, F32, F32, )
OPCODE(FPMin64, U64, U64, U64, ) OPCODE(FPMin64, F64, F64, F64, )
OPCODE(FPMul16, U16, U16, U16, ) OPCODE(FPMul16, F16, F16, F16, )
OPCODE(FPMul32, U32, U32, U32, ) OPCODE(FPMul32, F32, F32, F32, )
OPCODE(FPMul64, U64, U64, U64, ) OPCODE(FPMul64, F64, F64, F64, )
OPCODE(FPNeg16, U16, U16, ) OPCODE(FPNeg16, F16, F16, )
OPCODE(FPNeg32, U32, U32, ) OPCODE(FPNeg32, F32, F32, )
OPCODE(FPNeg64, U64, U64, ) OPCODE(FPNeg64, F64, F64, )
OPCODE(FPRecip32, U32, U32, ) OPCODE(FPRecip32, F32, F32, )
OPCODE(FPRecip64, U64, U64, ) OPCODE(FPRecip64, F64, F64, )
OPCODE(FPRecipSqrt32, U32, U32, ) OPCODE(FPRecipSqrt32, F32, F32, )
OPCODE(FPRecipSqrt64, U64, U64, ) OPCODE(FPRecipSqrt64, F64, F64, )
OPCODE(FPSqrt, U32, U32, ) OPCODE(FPSqrt, F32, F32, )
OPCODE(FPSin, U32, U32, ) OPCODE(FPSin, F32, F32, )
OPCODE(FPSinNotReduced, U32, U32, ) OPCODE(FPSinNotReduced, F32, F32, )
OPCODE(FPExp2, U32, U32, ) OPCODE(FPExp2, F32, F32, )
OPCODE(FPExp2NotReduced, U32, U32, ) OPCODE(FPExp2NotReduced, F32, F32, )
OPCODE(FPCos, U32, U32, ) OPCODE(FPCos, F32, F32, )
OPCODE(FPCosNotReduced, U32, U32, ) OPCODE(FPCosNotReduced, F32, F32, )
OPCODE(FPLog2, U32, U32, ) OPCODE(FPLog2, F32, F32, )
OPCODE(FPSaturate16, U16, U16, ) OPCODE(FPSaturate16, F16, F16, )
OPCODE(FPSaturate32, U32, U32, ) OPCODE(FPSaturate32, F32, F32, )
OPCODE(FPSaturate64, U64, U64, ) OPCODE(FPSaturate64, F64, F64, )
OPCODE(FPRoundEven16, U16, U16, ) OPCODE(FPRoundEven16, F16, F16, )
OPCODE(FPRoundEven32, U32, U32, ) OPCODE(FPRoundEven32, F32, F32, )
OPCODE(FPRoundEven64, U64, U64, ) OPCODE(FPRoundEven64, F64, F64, )
OPCODE(FPFloor16, U16, U16, ) OPCODE(FPFloor16, F16, F16, )
OPCODE(FPFloor32, U32, U32, ) OPCODE(FPFloor32, F32, F32, )
OPCODE(FPFloor64, U64, U64, ) OPCODE(FPFloor64, F64, F64, )
OPCODE(FPCeil16, U16, U16, ) OPCODE(FPCeil16, F16, F16, )
OPCODE(FPCeil32, U32, U32, ) OPCODE(FPCeil32, F32, F32, )
OPCODE(FPCeil64, U64, U64, ) OPCODE(FPCeil64, F64, F64, )
OPCODE(FPTrunc16, U16, U16, ) OPCODE(FPTrunc16, F16, F16, )
OPCODE(FPTrunc32, U32, U32, ) OPCODE(FPTrunc32, F32, F32, )
OPCODE(FPTrunc64, U64, U64, ) OPCODE(FPTrunc64, F64, F64, )
// Integer operations // Integer operations
OPCODE(IAdd32, U32, U32, U32, ) OPCODE(IAdd32, U32, U32, U32, )
@ -188,24 +214,24 @@ OPCODE(LogicalXor, U1, U1,
OPCODE(LogicalNot, U1, U1, ) OPCODE(LogicalNot, U1, U1, )
// Conversion operations // Conversion operations
OPCODE(ConvertS16F16, U32, U16, ) OPCODE(ConvertS16F16, U32, F16, )
OPCODE(ConvertS16F32, U32, U32, ) OPCODE(ConvertS16F32, U32, F32, )
OPCODE(ConvertS16F64, U32, U64, ) OPCODE(ConvertS16F64, U32, F64, )
OPCODE(ConvertS32F16, U32, U16, ) OPCODE(ConvertS32F16, U32, F16, )
OPCODE(ConvertS32F32, U32, U32, ) OPCODE(ConvertS32F32, U32, F32, )
OPCODE(ConvertS32F64, U32, U64, ) OPCODE(ConvertS32F64, U32, F64, )
OPCODE(ConvertS64F16, U64, U16, ) OPCODE(ConvertS64F16, U64, F16, )
OPCODE(ConvertS64F32, U64, U32, ) OPCODE(ConvertS64F32, U64, F32, )
OPCODE(ConvertS64F64, U64, U64, ) OPCODE(ConvertS64F64, U64, F64, )
OPCODE(ConvertU16F16, U32, U16, ) OPCODE(ConvertU16F16, U32, F16, )
OPCODE(ConvertU16F32, U32, U32, ) OPCODE(ConvertU16F32, U32, F32, )
OPCODE(ConvertU16F64, U32, U64, ) OPCODE(ConvertU16F64, U32, F64, )
OPCODE(ConvertU32F16, U32, U16, ) OPCODE(ConvertU32F16, U32, F16, )
OPCODE(ConvertU32F32, U32, U32, ) OPCODE(ConvertU32F32, U32, F32, )
OPCODE(ConvertU32F64, U32, U64, ) OPCODE(ConvertU32F64, U32, F64, )
OPCODE(ConvertU64F16, U64, U16, ) OPCODE(ConvertU64F16, U64, F16, )
OPCODE(ConvertU64F32, U64, U32, ) OPCODE(ConvertU64F32, U64, F32, )
OPCODE(ConvertU64F64, U64, U64, ) OPCODE(ConvertU64F64, U64, F64, )
OPCODE(ConvertU64U32, U64, U32, ) OPCODE(ConvertU64U32, U64, U32, )
OPCODE(ConvertU32U64, U32, U64, ) OPCODE(ConvertU32U64, U32, U64, )

View file

@ -11,7 +11,9 @@ namespace Shader::IR {
std::string NameOf(Type type) { std::string NameOf(Type type) {
static constexpr std::array names{ static constexpr std::array names{
"Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32", "U64", "Opaque", "Label", "Reg", "Pred", "Attribute", "U1", "U8", "U16", "U32",
"U64", "F16", "F32", "F64", "U32x2", "U32x3", "U32x4", "F16x2", "F16x3",
"F16x4", "F32x2", "F32x3", "F32x4", "F64x2", "F64x3", "F64x4",
}; };
const size_t bits{static_cast<size_t>(type)}; const size_t bits{static_cast<size_t>(type)};
if (bits == 0) { if (bits == 0) {

View file

@ -25,6 +25,21 @@ enum class Type {
U16 = 1 << 7, U16 = 1 << 7,
U32 = 1 << 8, U32 = 1 << 8,
U64 = 1 << 9, U64 = 1 << 9,
F16 = 1 << 10,
F32 = 1 << 11,
F64 = 1 << 12,
U32x2 = 1 << 13,
U32x3 = 1 << 14,
U32x4 = 1 << 15,
F16x2 = 1 << 16,
F16x3 = 1 << 17,
F16x4 = 1 << 18,
F32x2 = 1 << 19,
F32x3 = 1 << 20,
F32x4 = 1 << 21,
F64x2 = 1 << 22,
F64x3 = 1 << 23,
F64x4 = 1 << 24,
}; };
DECLARE_ENUM_FLAG_OPERATORS(Type) DECLARE_ENUM_FLAG_OPERATORS(Type)

View file

@ -26,8 +26,12 @@ Value::Value(u16 value) noexcept : type{Type::U16}, imm_u16{value} {}
Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {} Value::Value(u32 value) noexcept : type{Type::U32}, imm_u32{value} {}
Value::Value(f32 value) noexcept : type{Type::F32}, imm_f32{value} {}
Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {} Value::Value(u64 value) noexcept : type{Type::U64}, imm_u64{value} {}
Value::Value(f64 value) noexcept : type{Type::F64}, imm_f64{value} {}
bool Value::IsIdentity() const noexcept { bool Value::IsIdentity() const noexcept {
return type == Type::Opaque && inst->Opcode() == Opcode::Identity; return type == Type::Opaque && inst->Opcode() == Opcode::Identity;
} }
@ -122,6 +126,14 @@ u32 Value::U32() const {
return imm_u32; return imm_u32;
} }
f32 Value::F32() const {
if (IsIdentity()) {
return inst->Arg(0).F32();
}
ValidateAccess(Type::F32);
return imm_f32;
}
u64 Value::U64() const { u64 Value::U64() const {
if (IsIdentity()) { if (IsIdentity()) {
return inst->Arg(0).U64(); return inst->Arg(0).U64();
@ -152,11 +164,27 @@ bool Value::operator==(const Value& other) const {
case Type::U8: case Type::U8:
return imm_u8 == other.imm_u8; return imm_u8 == other.imm_u8;
case Type::U16: case Type::U16:
case Type::F16:
return imm_u16 == other.imm_u16; return imm_u16 == other.imm_u16;
case Type::U32: case Type::U32:
case Type::F32:
return imm_u32 == other.imm_u32; return imm_u32 == other.imm_u32;
case Type::U64: case Type::U64:
case Type::F64:
return imm_u64 == other.imm_u64; return imm_u64 == other.imm_u64;
case Type::U32x2:
case Type::U32x3:
case Type::U32x4:
case Type::F16x2:
case Type::F16x3:
case Type::F16x4:
case Type::F32x2:
case Type::F32x3:
case Type::F32x4:
case Type::F64x2:
case Type::F64x3:
case Type::F64x4:
break;
} }
throw LogicError("Invalid type {}", type); throw LogicError("Invalid type {}", type);
} }

View file

@ -28,7 +28,9 @@ public:
explicit Value(u8 value) noexcept; explicit Value(u8 value) noexcept;
explicit Value(u16 value) noexcept; explicit Value(u16 value) noexcept;
explicit Value(u32 value) noexcept; explicit Value(u32 value) noexcept;
explicit Value(f32 value) noexcept;
explicit Value(u64 value) noexcept; explicit Value(u64 value) noexcept;
explicit Value(f64 value) noexcept;
[[nodiscard]] bool IsIdentity() const noexcept; [[nodiscard]] bool IsIdentity() const noexcept;
[[nodiscard]] bool IsEmpty() const noexcept; [[nodiscard]] bool IsEmpty() const noexcept;
@ -46,6 +48,7 @@ public:
[[nodiscard]] u8 U8() const; [[nodiscard]] u8 U8() const;
[[nodiscard]] u16 U16() const; [[nodiscard]] u16 U16() const;
[[nodiscard]] u32 U32() const; [[nodiscard]] u32 U32() const;
[[nodiscard]] f32 F32() const;
[[nodiscard]] u64 U64() const; [[nodiscard]] u64 U64() const;
[[nodiscard]] bool operator==(const Value& other) const; [[nodiscard]] bool operator==(const Value& other) const;
@ -65,7 +68,9 @@ private:
u8 imm_u8; u8 imm_u8;
u16 imm_u16; u16 imm_u16;
u32 imm_u32; u32 imm_u32;
f32 imm_f32;
u64 imm_u64; u64 imm_u64;
f64 imm_f64;
}; };
}; };
@ -93,8 +98,13 @@ using U8 = TypedValue<Type::U8>;
using U16 = TypedValue<Type::U16>; using U16 = TypedValue<Type::U16>;
using U32 = TypedValue<Type::U32>; using U32 = TypedValue<Type::U32>;
using U64 = TypedValue<Type::U64>; using U64 = TypedValue<Type::U64>;
using F16 = TypedValue<Type::F16>;
using F32 = TypedValue<Type::F32>;
using F64 = TypedValue<Type::F64>;
using U32U64 = TypedValue<Type::U32 | Type::U64>; using U32U64 = TypedValue<Type::U32 | Type::U64>;
using F32F64 = TypedValue<Type::F32 | Type::F64>;
using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>; using U16U32U64 = TypedValue<Type::U16 | Type::U32 | Type::U64>;
using F16F32F64 = TypedValue<Type::F16 | Type::F32 | Type::F64>;
using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>; using UAny = TypedValue<Type::U8 | Type::U16 | Type::U32 | Type::U64>;
} // namespace Shader::IR } // namespace Shader::IR

View file

@ -11,7 +11,7 @@ namespace Shader::Maxwell {
namespace { namespace {
void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding, void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRounding fp_rounding,
const IR::U32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) { const IR::F32& src_b, bool abs_a, bool neg_a, bool abs_b, bool neg_b) {
union { union {
u64 raw; u64 raw;
BitField<0, 8, IR::Reg> dest_reg; BitField<0, 8, IR::Reg> dest_reg;
@ -24,17 +24,17 @@ void FADD(TranslatorVisitor& v, u64 insn, bool sat, bool cc, bool ftz, FpRoundin
if (cc) { if (cc) {
throw NotImplementedException("FADD CC"); throw NotImplementedException("FADD CC");
} }
const IR::U32 op_a{v.ir.FPAbsNeg(v.X(fadd.src_a), abs_a, neg_a)}; const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fadd.src_a), abs_a, neg_a)};
const IR::U32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, abs_b, neg_b)};
IR::FpControl control{ IR::FpControl control{
.no_contraction{true}, .no_contraction{true},
.rounding{CastFpRounding(fp_rounding)}, .rounding{CastFpRounding(fp_rounding)},
.fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None}, .fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
}; };
v.X(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control)); v.F(fadd.dest_reg, v.ir.FPAdd(op_a, op_b, control));
} }
void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { void FADD(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union { union {
u64 raw; u64 raw;
BitField<39, 2, FpRounding> fp_rounding; BitField<39, 2, FpRounding> fp_rounding;
@ -53,7 +53,7 @@ void FADD(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
} // Anonymous namespace } // Anonymous namespace
void TranslatorVisitor::FADD_reg(u64 insn) { void TranslatorVisitor::FADD_reg(u64 insn) {
FADD(*this, insn, GetReg20(insn)); FADD(*this, insn, GetReg20F(insn));
} }
void TranslatorVisitor::FADD_cbuf(u64) { void TranslatorVisitor::FADD_cbuf(u64) {

View file

@ -55,21 +55,21 @@ size_t BitSize(DestFormat dest_format) {
} }
} }
void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::U16U32U64& op_a) { void TranslateF2I(TranslatorVisitor& v, u64 insn, const IR::F16F32F64& src_a) {
// F2I is used to convert from a floating point value to an integer // F2I is used to convert from a floating point value to an integer
const F2I f2i{insn}; const F2I f2i{insn};
const IR::U16U32U64 float_value{v.ir.FPAbsNeg(op_a, f2i.abs != 0, f2i.neg != 0)}; const IR::F16F32F64 op_a{v.ir.FPAbsNeg(src_a, f2i.abs != 0, f2i.neg != 0)};
const IR::U16U32U64 rounded_value{[&] { const IR::F16F32F64 rounded_value{[&] {
switch (f2i.rounding) { switch (f2i.rounding) {
case Rounding::Round: case Rounding::Round:
return v.ir.FPRoundEven(float_value); return v.ir.FPRoundEven(op_a);
case Rounding::Floor: case Rounding::Floor:
return v.ir.FPFloor(float_value); return v.ir.FPFloor(op_a);
case Rounding::Ceil: case Rounding::Ceil:
return v.ir.FPCeil(float_value); return v.ir.FPCeil(op_a);
case Rounding::Trunc: case Rounding::Trunc:
return v.ir.FPTrunc(float_value); return v.ir.FPTrunc(op_a);
default: default:
throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value()); throw NotImplementedException("Invalid F2I rounding {}", f2i.rounding.Value());
} }
@ -105,12 +105,12 @@ void TranslatorVisitor::F2I_reg(u64 insn) {
BitField<20, 8, IR::Reg> src_reg; BitField<20, 8, IR::Reg> src_reg;
} const f2i{insn}; } const f2i{insn};
const IR::U16U32U64 op_a{[&]() -> IR::U16U32U64 { const IR::F16F32F64 op_a{[&]() -> IR::F16F32F64 {
switch (f2i.base.src_format) { switch (f2i.base.src_format) {
case SrcFormat::F16: case SrcFormat::F16:
return ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half); return IR::F16{ir.CompositeExtract(ir.UnpackFloat2x16(X(f2i.src_reg)), f2i.base.half)};
case SrcFormat::F32: case SrcFormat::F32:
return X(f2i.src_reg); return F(f2i.src_reg);
case SrcFormat::F64: case SrcFormat::F64:
return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1))); return ir.PackDouble2x32(ir.CompositeConstruct(X(f2i.src_reg), X(f2i.src_reg + 1)));
default: default:

View file

@ -9,7 +9,7 @@
namespace Shader::Maxwell { namespace Shader::Maxwell {
namespace { namespace {
void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c, bool neg_a, void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c, bool neg_a,
bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) { bool neg_b, bool neg_c, bool sat, bool cc, FmzMode fmz_mode, FpRounding fp_rounding) {
union { union {
u64 raw; u64 raw;
@ -23,18 +23,18 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
if (cc) { if (cc) {
throw NotImplementedException("FFMA CC"); throw NotImplementedException("FFMA CC");
} }
const IR::U32 op_a{v.ir.FPAbsNeg(v.X(ffma.src_a), false, neg_a)}; const IR::F32 op_a{v.ir.FPAbsNeg(v.F(ffma.src_a), false, neg_a)};
const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
const IR::U32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)}; const IR::F32 op_c{v.ir.FPAbsNeg(src_c, false, neg_c)};
const IR::FpControl fp_control{ const IR::FpControl fp_control{
.no_contraction{true}, .no_contraction{true},
.rounding{CastFpRounding(fp_rounding)}, .rounding{CastFpRounding(fp_rounding)},
.fmz_mode{CastFmzMode(fmz_mode)}, .fmz_mode{CastFmzMode(fmz_mode)},
}; };
v.X(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control)); v.F(ffma.dest_reg, v.ir.FPFma(op_a, op_b, op_c, fp_control));
} }
void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& src_c) { void FFMA(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, const IR::F32& src_c) {
union { union {
u64 raw; u64 raw;
BitField<47, 1, u64> cc; BitField<47, 1, u64> cc;
@ -51,7 +51,7 @@ void FFMA(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, const IR::U32& s
} // Anonymous namespace } // Anonymous namespace
void TranslatorVisitor::FFMA_reg(u64 insn) { void TranslatorVisitor::FFMA_reg(u64 insn) {
FFMA(*this, insn, GetReg20(insn), GetReg39(insn)); FFMA(*this, insn, GetReg20F(insn), GetReg39F(insn));
} }
void TranslatorVisitor::FFMA_rc(u64) { void TranslatorVisitor::FFMA_rc(u64) {
@ -59,7 +59,7 @@ void TranslatorVisitor::FFMA_rc(u64) {
} }
void TranslatorVisitor::FFMA_cr(u64 insn) { void TranslatorVisitor::FFMA_cr(u64 insn) {
FFMA(*this, insn, GetCbuf(insn), GetReg39(insn)); FFMA(*this, insn, GetCbufF(insn), GetReg39F(insn));
} }
void TranslatorVisitor::FFMA_imm(u64) { void TranslatorVisitor::FFMA_imm(u64) {

View file

@ -35,8 +35,8 @@ void TranslatorVisitor::MUFU(u64 insn) {
BitField<50, 1, u64> sat; BitField<50, 1, u64> sat;
} const mufu{insn}; } const mufu{insn};
const IR::U32 op_a{ir.FPAbsNeg(X(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)}; const IR::F32 op_a{ir.FPAbsNeg(F(mufu.src_reg), mufu.abs != 0, mufu.neg != 0)};
IR::U32 value{[&]() -> IR::U32 { IR::F32 value{[&]() -> IR::F32 {
switch (mufu.operation) { switch (mufu.operation) {
case Operation::Cos: case Operation::Cos:
return ir.FPCosNotReduced(op_a); return ir.FPCosNotReduced(op_a);
@ -65,7 +65,7 @@ void TranslatorVisitor::MUFU(u64 insn) {
value = ir.FPSaturate(value); value = ir.FPSaturate(value);
} }
X(mufu.dest_reg, value); F(mufu.dest_reg, value);
} }
} // namespace Shader::Maxwell } // namespace Shader::Maxwell

View file

@ -4,6 +4,7 @@
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/ir/modifiers.h" #include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h" #include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
@ -43,7 +44,7 @@ float ScaleFactor(Scale scale) {
throw NotImplementedException("Invalid FMUL scale {}", scale); throw NotImplementedException("Invalid FMUL scale {}", scale);
} }
void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode, void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b, FmzMode fmz_mode,
FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) { FpRounding fp_rounding, Scale scale, bool sat, bool cc, bool neg_b) {
union { union {
u64 raw; u64 raw;
@ -57,23 +58,23 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b, FmzMode fmz_mode
if (sat) { if (sat) {
throw NotImplementedException("FMUL SAT"); throw NotImplementedException("FMUL SAT");
} }
IR::U32 op_a{v.X(fmul.src_a)}; IR::F32 op_a{v.F(fmul.src_a)};
if (scale != Scale::None) { if (scale != Scale::None) {
if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) { if (fmz_mode != FmzMode::FTZ || fp_rounding != FpRounding::RN) {
throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers"); throw NotImplementedException("FMUL scale with non-FMZ or non-RN modifiers");
} }
op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale))); op_a = v.ir.FPMul(op_a, v.ir.Imm32(ScaleFactor(scale)));
} }
const IR::U32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)}; const IR::F32 op_b{v.ir.FPAbsNeg(src_b, false, neg_b)};
const IR::FpControl fp_control{ const IR::FpControl fp_control{
.no_contraction{true}, .no_contraction{true},
.rounding{CastFpRounding(fp_rounding)}, .rounding{CastFpRounding(fp_rounding)},
.fmz_mode{CastFmzMode(fmz_mode)}, .fmz_mode{CastFmzMode(fmz_mode)},
}; };
v.X(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control)); v.F(fmul.dest_reg, v.ir.FPMul(op_a, op_b, fp_control));
} }
void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) { void FMUL(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union { union {
u64 raw; u64 raw;
BitField<39, 2, FpRounding> fp_rounding; BitField<39, 2, FpRounding> fp_rounding;
@ -90,7 +91,7 @@ void FMUL(TranslatorVisitor& v, u64 insn, const IR::U32& src_b) {
} // Anonymous namespace } // Anonymous namespace
void TranslatorVisitor::FMUL_reg(u64 insn) { void TranslatorVisitor::FMUL_reg(u64 insn) {
return FMUL(*this, insn, GetReg20(insn)); return FMUL(*this, insn, GetReg20F(insn));
} }
void TranslatorVisitor::FMUL_cbuf(u64) { void TranslatorVisitor::FMUL_cbuf(u64) {

View file

@ -12,10 +12,18 @@ IR::U32 TranslatorVisitor::X(IR::Reg reg) {
return ir.GetReg(reg); return ir.GetReg(reg);
} }
IR::F32 TranslatorVisitor::F(IR::Reg reg) {
return ir.BitCast<IR::F32>(X(reg));
}
void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) { void TranslatorVisitor::X(IR::Reg dest_reg, const IR::U32& value) {
ir.SetReg(dest_reg, value); ir.SetReg(dest_reg, value);
} }
void TranslatorVisitor::F(IR::Reg dest_reg, const IR::F32& value) {
X(dest_reg, ir.BitCast<IR::U32>(value));
}
IR::U32 TranslatorVisitor::GetReg20(u64 insn) { IR::U32 TranslatorVisitor::GetReg20(u64 insn) {
union { union {
u64 raw; u64 raw;
@ -32,6 +40,14 @@ IR::U32 TranslatorVisitor::GetReg39(u64 insn) {
return X(reg.index); return X(reg.index);
} }
IR::F32 TranslatorVisitor::GetReg20F(u64 insn) {
return ir.BitCast<IR::F32>(GetReg20(insn));
}
IR::F32 TranslatorVisitor::GetReg39F(u64 insn) {
return ir.BitCast<IR::F32>(GetReg39(insn));
}
IR::U32 TranslatorVisitor::GetCbuf(u64 insn) { IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
union { union {
u64 raw; u64 raw;
@ -49,6 +65,10 @@ IR::U32 TranslatorVisitor::GetCbuf(u64 insn) {
return ir.GetCbuf(binding, byte_offset); return ir.GetCbuf(binding, byte_offset);
} }
IR::F32 TranslatorVisitor::GetCbufF(u64 insn) {
return ir.BitCast<IR::F32>(GetCbuf(insn));
}
IR::U32 TranslatorVisitor::GetImm20(u64 insn) { IR::U32 TranslatorVisitor::GetImm20(u64 insn) {
union { union {
u64 raw; u64 raw;

View file

@ -296,12 +296,18 @@ public:
void XMAD_imm(u64 insn); void XMAD_imm(u64 insn);
[[nodiscard]] IR::U32 X(IR::Reg reg); [[nodiscard]] IR::U32 X(IR::Reg reg);
[[nodiscard]] IR::F32 F(IR::Reg reg);
void X(IR::Reg dest_reg, const IR::U32& value); void X(IR::Reg dest_reg, const IR::U32& value);
void F(IR::Reg dest_reg, const IR::F32& value);
[[nodiscard]] IR::U32 GetReg20(u64 insn); [[nodiscard]] IR::U32 GetReg20(u64 insn);
[[nodiscard]] IR::U32 GetReg39(u64 insn); [[nodiscard]] IR::U32 GetReg39(u64 insn);
[[nodiscard]] IR::F32 GetReg20F(u64 insn);
[[nodiscard]] IR::F32 GetReg39F(u64 insn);
[[nodiscard]] IR::U32 GetCbuf(u64 insn); [[nodiscard]] IR::U32 GetCbuf(u64 insn);
[[nodiscard]] IR::F32 GetCbufF(u64 insn);
[[nodiscard]] IR::U32 GetImm20(u64 insn); [[nodiscard]] IR::U32 GetImm20(u64 insn);

View file

@ -5,22 +5,23 @@
#include "common/bit_field.h" #include "common/bit_field.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "shader_recompiler/exception.h" #include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/ir_emitter.h"
#include "shader_recompiler/frontend/maxwell/opcode.h" #include "shader_recompiler/frontend/maxwell/opcode.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell { namespace Shader::Maxwell {
namespace { namespace {
enum class InterpolationMode : u64 { enum class InterpolationMode : u64 {
Pass = 0, Pass,
Multiply = 1, Multiply,
Constant = 2, Constant,
Sc = 3, Sc,
}; };
enum class SampleMode : u64 { enum class SampleMode : u64 {
Default = 0, Default,
Centroid = 1, Centroid,
Offset = 2, Offset,
}; };
} // Anonymous namespace } // Anonymous namespace
@ -54,12 +55,12 @@ void TranslatorVisitor::IPA(u64 insn) {
} }
const IR::Attribute attribute{ipa.attribute}; const IR::Attribute attribute{ipa.attribute};
IR::U32 value{ir.GetAttribute(attribute)}; IR::F32 value{ir.GetAttribute(attribute)};
if (IR::IsGeneric(attribute)) { if (IR::IsGeneric(attribute)) {
// const bool is_perspective{UnimplementedReadHeader(GenericAttributeIndex(attribute))}; // const bool is_perspective{UnimplementedReadHeader(GenericAttributeIndex(attribute))};
const bool is_perspective{false}; const bool is_perspective{false};
if (is_perspective) { if (is_perspective) {
const IR::U32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))}; const IR::F32 rcp_position_w{ir.FPRecip(ir.GetAttribute(IR::Attribute::PositionW))};
value = ir.FPMul(value, rcp_position_w); value = ir.FPMul(value, rcp_position_w);
} }
} }
@ -68,7 +69,7 @@ void TranslatorVisitor::IPA(u64 insn) {
case InterpolationMode::Pass: case InterpolationMode::Pass:
break; break;
case InterpolationMode::Multiply: case InterpolationMode::Multiply:
value = ir.FPMul(value, ir.GetReg(ipa.multiplier)); value = ir.FPMul(value, F(ipa.multiplier));
break; break;
case InterpolationMode::Constant: case InterpolationMode::Constant:
throw NotImplementedException("IPA.CONSTANT"); throw NotImplementedException("IPA.CONSTANT");
@ -86,7 +87,7 @@ void TranslatorVisitor::IPA(u64 insn) {
value = ir.FPSaturate(value); value = ir.FPSaturate(value);
} }
ir.SetReg(ipa.dest_reg, value); F(ipa.dest_reg, value);
} }
} // namespace Shader::Maxwell } // namespace Shader::Maxwell

View file

@ -114,7 +114,7 @@ void TranslatorVisitor::LDG(u64 insn) {
} }
const IR::Value vector{ir.LoadGlobal64(address)}; const IR::Value vector{ir.LoadGlobal64(address)};
for (int i = 0; i < 2; ++i) { for (int i = 0; i < 2; ++i) {
X(dest_reg + i, ir.CompositeExtract(vector, i)); X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
} }
break; break;
} }
@ -124,7 +124,7 @@ void TranslatorVisitor::LDG(u64 insn) {
} }
const IR::Value vector{ir.LoadGlobal128(address)}; const IR::Value vector{ir.LoadGlobal128(address)};
for (int i = 0; i < 4; ++i) { for (int i = 0; i < 4; ++i) {
X(dest_reg + i, ir.CompositeExtract(vector, i)); X(dest_reg + i, IR::U32{ir.CompositeExtract(vector, i)});
} }
break; break;
} }

View file

@ -5,6 +5,7 @@
#include <algorithm> #include <algorithm>
#include <type_traits> #include <type_traits>
#include "common/bit_cast.h"
#include "common/bit_util.h" #include "common/bit_util.h"
#include "shader_recompiler/exception.h" #include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/ir/microinstruction.h" #include "shader_recompiler/frontend/ir/microinstruction.h"
@ -25,6 +26,8 @@ template <typename T>
return value.U1(); return value.U1();
} else if constexpr (std::is_same_v<T, u32>) { } else if constexpr (std::is_same_v<T, u32>) {
return value.U32(); return value.U32();
} else if constexpr (std::is_same_v<T, f32>) {
return value.F32();
} else if constexpr (std::is_same_v<T, u64>) { } else if constexpr (std::is_same_v<T, u64>) {
return value.U64(); return value.U64();
} }
@ -115,6 +118,19 @@ void FoldLogicalAnd(IR::Inst& inst) {
} }
} }
template <typename Dest, typename Source>
void FoldBitCast(IR::Inst& inst, IR::Opcode reverse) {
const IR::Value value{inst.Arg(0)};
if (value.IsImmediate()) {
inst.ReplaceUsesWith(IR::Value{Common::BitCast<Dest>(Arg<Source>(value))});
return;
}
IR::Inst* const arg_inst{value.InstRecursive()};
if (value.InstRecursive()->Opcode() == reverse) {
inst.ReplaceUsesWith(arg_inst->Arg(0));
}
}
void ConstantPropagation(IR::Inst& inst) { void ConstantPropagation(IR::Inst& inst) {
switch (inst.Opcode()) { switch (inst.Opcode()) {
case IR::Opcode::GetRegister: case IR::Opcode::GetRegister:
@ -123,6 +139,10 @@ void ConstantPropagation(IR::Inst& inst) {
return FoldGetPred(inst); return FoldGetPred(inst);
case IR::Opcode::IAdd32: case IR::Opcode::IAdd32:
return FoldAdd<u32>(inst); return FoldAdd<u32>(inst);
case IR::Opcode::BitCastF32U32:
return FoldBitCast<f32, u32>(inst, IR::Opcode::BitCastU32F32);
case IR::Opcode::BitCastU32F32:
return FoldBitCast<u32, f32>(inst, IR::Opcode::BitCastF32U32);
case IR::Opcode::IAdd64: case IR::Opcode::IAdd64:
return FoldAdd<u64>(inst); return FoldAdd<u64>(inst);
case IR::Opcode::BitFieldUExtract: case IR::Opcode::BitFieldUExtract:

View file

@ -108,8 +108,8 @@ bool MeetsBias(const StorageBufferAddr& storage_buffer, const Bias& bias) noexce
storage_buffer.offset < bias.offset_end; storage_buffer.offset < bias.offset_end;
} }
/// Ignores a global memory operation, reads return zero and writes are ignored /// Discards a global memory operation, reads return zero and writes are ignored
void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) { void DiscardGlobalMemory(IR::Block& block, IR::Block::iterator inst) {
const IR::Value zero{u32{0}}; const IR::Value zero{u32{0}};
switch (inst->Opcode()) { switch (inst->Opcode()) {
case IR::Opcode::LoadGlobalS8: case IR::Opcode::LoadGlobalS8:
@ -120,12 +120,12 @@ void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) {
inst->ReplaceUsesWith(zero); inst->ReplaceUsesWith(zero);
break; break;
case IR::Opcode::LoadGlobal64: case IR::Opcode::LoadGlobal64:
inst->ReplaceUsesWith( inst->ReplaceUsesWith(IR::Value{
IR::Value{&*block.PrependNewInst(inst, IR::Opcode::CompositeConstruct2, {zero, zero})}); &*block.PrependNewInst(inst, IR::Opcode::CompositeConstructU32x2, {zero, zero})});
break; break;
case IR::Opcode::LoadGlobal128: case IR::Opcode::LoadGlobal128:
inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst( inst->ReplaceUsesWith(IR::Value{&*block.PrependNewInst(
inst, IR::Opcode::CompositeConstruct4, {zero, zero, zero, zero})}); inst, IR::Opcode::CompositeConstructU32x4, {zero, zero, zero, zero})});
break; break;
case IR::Opcode::WriteGlobalS8: case IR::Opcode::WriteGlobalS8:
case IR::Opcode::WriteGlobalU8: case IR::Opcode::WriteGlobalU8:
@ -137,7 +137,8 @@ void IgnoreGlobalMemory(IR::Block& block, IR::Block::iterator inst) {
inst->Invalidate(); inst->Invalidate();
break; break;
default: default:
throw LogicError("Invalid opcode to ignore its global memory operation {}", inst->Opcode()); throw LogicError("Invalid opcode to discard its global memory operation {}",
inst->Opcode());
} }
} }
@ -196,7 +197,7 @@ void CollectStorageBuffers(IR::Block& block, IR::Block::iterator inst,
storage_buffer = Track(addr, nullptr); storage_buffer = Track(addr, nullptr);
if (!storage_buffer) { if (!storage_buffer) {
// If that also failed, drop the global memory usage // If that also failed, drop the global memory usage
IgnoreGlobalMemory(block, inst); DiscardGlobalMemory(block, inst);
} }
} }
// Collect storage buffer and the instruction // Collect storage buffer and the instruction
@ -242,12 +243,12 @@ std::optional<IR::U32> TrackLowAddress(IR::IREmitter& ir, IR::Inst* inst) {
if (vector.IsImmediate()) { if (vector.IsImmediate()) {
return std::nullopt; return std::nullopt;
} }
// This vector is expected to be a CompositeConstruct2 // This vector is expected to be a CompositeConstructU32x2
IR::Inst* const vector_inst{vector.InstRecursive()}; IR::Inst* const vector_inst{vector.InstRecursive()};
if (vector_inst->Opcode() != IR::Opcode::CompositeConstruct2) { if (vector_inst->Opcode() != IR::Opcode::CompositeConstructU32x2) {
return std::nullopt; return std::nullopt;
} }
// Grab the first argument from the CompositeConstruct2, this is the low address. // Grab the first argument from the CompositeConstructU32x2, this is the low address.
// Re-apply the offset in case we found one. // Re-apply the offset in case we found one.
const IR::U32 low_addr{vector_inst->Arg(0)}; const IR::U32 low_addr{vector_inst->Arg(0)};
return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr; return imm_offset != 0 ? IR::U32{ir.IAdd(low_addr, ir.Imm32(imm_offset))} : low_addr;

View file

@ -52,7 +52,7 @@ int main() {
// RunDatabase(); // RunDatabase();
// FileEnvironment env{"D:\\Shaders\\Database\\test.bin"}; // FileEnvironment env{"D:\\Shaders\\Database\\test.bin"};
FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS8F146B41DB6BD826.bin"}; FileEnvironment env{"D:\\Shaders\\Database\\Oninaki\\CS15C2FB1F0B965767.bin"};
auto cfg{std::make_unique<Flow::CFG>(env, 0)}; auto cfg{std::make_unique<Flow::CFG>(env, 0)};
// fmt::print(stdout, "{}\n", cfg->Dot()); // fmt::print(stdout, "{}\n", cfg->Dot());