shader: Implement TEXS

This commit is contained in:
ReinUsesLisp 2021-03-15 04:54:43 -03:00 committed by ameerj
parent 71f96fa636
commit 17a82b56d7
8 changed files with 287 additions and 7 deletions

View file

@ -102,7 +102,8 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate/impl/predicate_set_predicate.cpp frontend/maxwell/translate/impl/predicate_set_predicate.cpp
frontend/maxwell/translate/impl/predicate_set_register.cpp frontend/maxwell/translate/impl/predicate_set_register.cpp
frontend/maxwell/translate/impl/select_source_with_predicate.cpp frontend/maxwell/translate/impl/select_source_with_predicate.cpp
frontend/maxwell/translate/impl/texture_sample.cpp frontend/maxwell/translate/impl/texture_fetch.cpp
frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
frontend/maxwell/translate/translate.cpp frontend/maxwell/translate/translate.cpp
frontend/maxwell/translate/translate.h frontend/maxwell/translate/translate.h
ir_opt/collect_shader_info_pass.cpp ir_opt/collect_shader_info_pass.cpp

View file

@ -57,18 +57,27 @@ Id Texture(EmitContext& ctx, const IR::Value& index) {
throw NotImplementedException("Indirect texture sample"); throw NotImplementedException("Indirect texture sample");
} }
Id Decorate(EmitContext& ctx, IR::Inst* inst, Id sample) {
const auto info{inst->Flags<IR::TextureInstInfo>()};
if (info.relaxed_precision != 0) {
ctx.Decorate(sample, spv::Decoration::RelaxedPrecision);
}
return sample;
}
template <typename MethodPtrType, typename... Args> template <typename MethodPtrType, typename... Args>
Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst, Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst,
Id result_type, Args&&... args) { Id result_type, Args&&... args) {
IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
if (!sparse) { if (!sparse) {
return (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...); return Decorate(ctx, inst, (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...));
} }
const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)}; const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)};
const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)}; const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)};
const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)}; const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)};
sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code)); sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code));
sparse->Invalidate(); sparse->Invalidate();
Decorate(ctx, inst, sample);
return ctx.OpCompositeExtract(result_type, sample, 1U); return ctx.OpCompositeExtract(result_type, sample, 1U);
} }
} // Anonymous namespace } // Anonymous namespace

View file

@ -512,6 +512,14 @@ Value IREmitter::UnpackFloat2x16(const U32& value) {
return Inst(Opcode::UnpackFloat2x16, value); return Inst(Opcode::UnpackFloat2x16, value);
} }
U32 IREmitter::PackHalf2x16(const Value& vector) {
return Inst<U32>(Opcode::PackHalf2x16, vector);
}
Value IREmitter::UnpackHalf2x16(const U32& value) {
return Inst(Opcode::UnpackHalf2x16, value);
}
F64 IREmitter::PackDouble2x32(const Value& vector) { F64 IREmitter::PackDouble2x32(const Value& vector) {
return Inst<F64>(Opcode::PackDouble2x32, vector); return Inst<F64>(Opcode::PackDouble2x32, vector);
} }

View file

@ -115,6 +115,9 @@ public:
[[nodiscard]] U32 PackFloat2x16(const Value& vector); [[nodiscard]] U32 PackFloat2x16(const Value& vector);
[[nodiscard]] Value UnpackFloat2x16(const U32& value); [[nodiscard]] Value UnpackFloat2x16(const U32& value);
[[nodiscard]] U32 PackHalf2x16(const Value& vector);
[[nodiscard]] Value UnpackHalf2x16(const U32& value);
[[nodiscard]] F64 PackDouble2x32(const Value& vector); [[nodiscard]] F64 PackDouble2x32(const Value& vector);
[[nodiscard]] Value UnpackDouble2x32(const F64& value); [[nodiscard]] Value UnpackDouble2x32(const F64& value);

View file

@ -36,7 +36,8 @@ union TextureInstInfo {
u32 raw; u32 raw;
BitField<0, 8, TextureType> type; BitField<0, 8, TextureType> type;
BitField<8, 1, u32> has_bias; BitField<8, 1, u32> has_bias;
BitField<16, 1, u32> has_lod_clamp; BitField<9, 1, u32> has_lod_clamp;
BitField<10, 1, u32> relaxed_precision;
}; };
static_assert(sizeof(TextureInstInfo) <= sizeof(u32)); static_assert(sizeof(TextureInstInfo) <= sizeof(u32));

View file

@ -553,10 +553,6 @@ void TranslatorVisitor::SYNC(u64) {
ThrowNotImplemented(Opcode::SYNC); ThrowNotImplemented(Opcode::SYNC);
} }
void TranslatorVisitor::TEXS(u64) {
ThrowNotImplemented(Opcode::TEXS);
}
void TranslatorVisitor::TLD(u64) { void TranslatorVisitor::TLD(u64) {
ThrowNotImplemented(Opcode::TLD); ThrowNotImplemented(Opcode::TLD);
} }

View file

@ -0,0 +1,262 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <utility>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/ir/modifiers.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Precision : u64 {
F16,
F32,
};
union Encoding {
u64 raw;
BitField<59, 1, Precision> precision;
BitField<53, 4, u64> encoding;
BitField<49, 1, u64> nodep;
BitField<28, 8, IR::Reg> dest_reg_b;
BitField<0, 8, IR::Reg> dest_reg_a;
BitField<8, 8, IR::Reg> src_reg_a;
BitField<20, 8, IR::Reg> src_reg_b;
BitField<36, 13, u64> cbuf_offset;
BitField<50, 3, u64> swizzle;
};
constexpr unsigned R = 1;
constexpr unsigned G = 2;
constexpr unsigned B = 4;
constexpr unsigned A = 8;
constexpr std::array RG_LUT{
R, //
G, //
B, //
A, //
R | G, //
R | A, //
G | A, //
B | A, //
};
constexpr std::array RGBA_LUT{
R | G | B, //
R | G | A, //
R | B | A, //
G | B | A, //
R | G | B | A, //
};
void CheckAlignment(IR::Reg reg, int alignment) {
if (!IR::IsAligned(reg, alignment)) {
throw NotImplementedException("Unaligned source register {}", reg);
}
}
template <typename... Args>
IR::Value Composite(TranslatorVisitor& v, Args... regs) {
return v.ir.CompositeConstruct(v.F(regs)...);
}
IR::F32 ReadArray(TranslatorVisitor& v, const IR::U32& value) {
return v.ir.ConvertUToF(32, v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(16)));
}
IR::Value Sample(TranslatorVisitor& v, u64 insn) {
const Encoding texs{insn};
const IR::U32 handle{v.ir.Imm32(static_cast<u32>(texs.cbuf_offset))};
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::Reg reg_a{texs.src_reg_a};
const IR::Reg reg_b{texs.src_reg_b};
IR::TextureInstInfo info{};
if (texs.precision == Precision::F16) {
info.relaxed_precision.Assign(1);
}
switch (texs.encoding) {
case 0: // 1D.LZ
info.type.Assign(TextureType::Color1D);
return v.ir.ImageSampleExplicitLod(handle, v.F(reg_a), zero, {}, {}, info);
case 1: // 2D
info.type.Assign(TextureType::Color2D);
return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_b), {}, {}, {}, info);
case 2: // 2D.LZ
info.type.Assign(TextureType::Color2D);
return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_b), zero, {}, {}, info);
case 3: // 2D.LL
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::Color2D);
return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b), {},
{}, info);
case 4: // 2D.DC
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::Shadow2D);
return v.ir.ImageSampleDrefImplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
{}, {}, {}, info);
case 5: // 2D.LL.DC
CheckAlignment(reg_a, 2);
CheckAlignment(reg_b, 2);
info.type.Assign(TextureType::Shadow2D);
return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1),
v.F(reg_b + 1), v.F(reg_b), {}, {}, info);
case 6: // 2D.LZ.DC
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::Shadow2D);
return v.ir.ImageSampleDrefExplicitLod(handle, Composite(v, reg_a, reg_a + 1), v.F(reg_b),
zero, {}, {}, info);
case 7: // ARRAY_2D
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::ColorArray2D);
return v.ir.ImageSampleImplicitLod(
handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
{}, {}, {}, info);
case 8: // ARRAY_2D.LZ
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::ColorArray2D);
return v.ir.ImageSampleExplicitLod(
handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
zero, {}, {}, info);
case 9: // ARRAY_2D.LZ.DC
CheckAlignment(reg_a, 2);
CheckAlignment(reg_b, 2);
info.type.Assign(TextureType::ShadowArray2D);
return v.ir.ImageSampleDrefExplicitLod(
handle, v.ir.CompositeConstruct(v.F(reg_a + 1), v.F(reg_b), ReadArray(v, v.X(reg_a))),
v.F(reg_b + 1), zero, {}, {}, info);
case 10: // 3D
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::Color3D);
return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
{}, info);
case 11: // 3D.LZ
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::Color3D);
return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), zero, {},
{}, info);
case 12: // CUBE
CheckAlignment(reg_a, 2);
info.type.Assign(TextureType::ColorCube);
return v.ir.ImageSampleImplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b), {}, {},
{}, info);
case 13: // CUBE.LL
CheckAlignment(reg_a, 2);
CheckAlignment(reg_b, 2);
info.type.Assign(TextureType::ColorCube);
return v.ir.ImageSampleExplicitLod(handle, Composite(v, reg_a, reg_a + 1, reg_b),
v.F(reg_b + 1), {}, {}, info);
default:
throw NotImplementedException("Illegal encoding {}", texs.encoding.Value());
}
}
unsigned Swizzle(u64 insn) {
const Encoding texs{insn};
const size_t encoding{texs.swizzle};
if (texs.dest_reg_b == IR::Reg::RZ) {
if (encoding >= RG_LUT.size()) {
throw NotImplementedException("Illegal RG encoding {}", encoding);
}
return RG_LUT[encoding];
} else {
if (encoding >= RGBA_LUT.size()) {
throw NotImplementedException("Illegal RGBA encoding {}", encoding);
}
return RGBA_LUT[encoding];
}
}
IR::F32 Extract(TranslatorVisitor& v, const IR::Value& sample, unsigned component) {
const bool is_shadow{sample.Type() == IR::Type::F32};
if (is_shadow) {
const bool is_alpha{component == 3};
return is_alpha ? v.ir.Imm32(1.0f) : IR::F32{sample};
} else {
return IR::F32{v.ir.CompositeExtract(sample, component)};
}
}
IR::Reg RegStoreComponent32(u64 insn, unsigned index) {
const Encoding texs{insn};
switch (index) {
case 0:
return texs.dest_reg_a;
case 1:
CheckAlignment(texs.dest_reg_a, 2);
return texs.dest_reg_a + 1;
case 2:
return texs.dest_reg_b;
case 3:
CheckAlignment(texs.dest_reg_b, 2);
return texs.dest_reg_b + 1;
}
throw LogicError("Invalid store index {}", index);
}
void Store32(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)};
unsigned store_index{0};
for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) {
continue;
}
const IR::Reg dest{RegStoreComponent32(insn, store_index)};
v.F(dest, Extract(v, sample, component));
++store_index;
}
}
IR::U32 Pack(TranslatorVisitor& v, const IR::F32& lhs, const IR::F32& rhs) {
return v.ir.PackHalf2x16(v.ir.CompositeConstruct(lhs, rhs));
}
void Store16(TranslatorVisitor& v, u64 insn, const IR::Value& sample) {
const unsigned swizzle{Swizzle(insn)};
unsigned store_index{0};
std::array<IR::F32, 4> swizzled;
for (unsigned component = 0; component < 4; ++component) {
if (((swizzle >> component) & 1) == 0) {
continue;
}
swizzled[store_index] = Extract(v, sample, component);
++store_index;
}
const IR::F32 zero{v.ir.Imm32(0.0f)};
const Encoding texs{insn};
switch (store_index) {
case 1:
v.X(texs.dest_reg_a, Pack(v, swizzled[0], zero));
break;
case 2:
case 3:
case 4:
v.X(texs.dest_reg_a, Pack(v, swizzled[0], swizzled[1]));
switch (store_index) {
case 2:
break;
case 3:
v.X(texs.dest_reg_b, Pack(v, swizzled[2], zero));
break;
case 4:
v.X(texs.dest_reg_b, Pack(v, swizzled[2], swizzled[3]));
break;
}
break;
}
}
} // Anonymous namespace
void TranslatorVisitor::TEXS(u64 insn) {
const IR::Value sample{Sample(*this, insn)};
if (Encoding{insn}.precision == Precision::F32) {
Store32(*this, insn, sample);
} else {
Store16(*this, insn, sample);
}
}
} // namespace Shader::Maxwell