Merge pull request #1533 from FernandoS27/lmem
Implemented Shader Local Memory
This commit is contained in:
commit
d278f25bda
|
@ -208,6 +208,16 @@ enum class UniformType : u64 {
|
||||||
Double = 5,
|
Double = 5,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class StoreType : u64 {
|
||||||
|
Unsigned8 = 0,
|
||||||
|
Signed8 = 1,
|
||||||
|
Unsigned16 = 2,
|
||||||
|
Signed16 = 3,
|
||||||
|
Bytes32 = 4,
|
||||||
|
Bytes64 = 5,
|
||||||
|
Bytes128 = 6,
|
||||||
|
};
|
||||||
|
|
||||||
enum class IMinMaxExchange : u64 {
|
enum class IMinMaxExchange : u64 {
|
||||||
None = 0,
|
None = 0,
|
||||||
XLo = 1,
|
XLo = 1,
|
||||||
|
@ -747,6 +757,18 @@ union Instruction {
|
||||||
BitField<44, 2, u64> unknown;
|
BitField<44, 2, u64> unknown;
|
||||||
} ld_c;
|
} ld_c;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<48, 3, StoreType> type;
|
||||||
|
} ldst_sl;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<44, 2, u64> unknown;
|
||||||
|
} ld_l;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<44, 2, u64> unknown;
|
||||||
|
} st_l;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
BitField<0, 3, u64> pred0;
|
BitField<0, 3, u64> pred0;
|
||||||
BitField<3, 3, u64> pred3;
|
BitField<3, 3, u64> pred3;
|
||||||
|
@ -1209,6 +1231,7 @@ union Instruction {
|
||||||
BitField<61, 1, u64> is_b_imm;
|
BitField<61, 1, u64> is_b_imm;
|
||||||
BitField<60, 1, u64> is_b_gpr;
|
BitField<60, 1, u64> is_b_gpr;
|
||||||
BitField<59, 1, u64> is_c_gpr;
|
BitField<59, 1, u64> is_c_gpr;
|
||||||
|
BitField<20, 24, s64> smem_imm;
|
||||||
|
|
||||||
Attribute attribute;
|
Attribute attribute;
|
||||||
Sampler sampler;
|
Sampler sampler;
|
||||||
|
@ -1232,8 +1255,12 @@ public:
|
||||||
BRA,
|
BRA,
|
||||||
PBK,
|
PBK,
|
||||||
LD_A,
|
LD_A,
|
||||||
|
LD_L,
|
||||||
|
LD_S,
|
||||||
LD_C,
|
LD_C,
|
||||||
ST_A,
|
ST_A,
|
||||||
|
ST_L,
|
||||||
|
ST_S,
|
||||||
LDG, // Load from global memory
|
LDG, // Load from global memory
|
||||||
STG, // Store in global memory
|
STG, // Store in global memory
|
||||||
TEX,
|
TEX,
|
||||||
|
@ -1490,8 +1517,12 @@ private:
|
||||||
INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
|
INST("111000110100---", Id::BRK, Type::Flow, "BRK"),
|
||||||
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
|
INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
|
||||||
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
|
INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
|
||||||
|
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
|
||||||
|
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
|
||||||
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
|
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
|
||||||
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
|
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
|
||||||
|
INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
|
||||||
|
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||||
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
|
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
|
||||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||||
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
|
INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
|
||||||
|
|
|
@ -96,6 +96,11 @@ struct Header {
|
||||||
}
|
}
|
||||||
} ps;
|
} ps;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
u64 GetLocalMemorySize() {
|
||||||
|
return (common1.shader_local_memory_low_size |
|
||||||
|
(common2.shader_local_memory_high_size << 24));
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
|
static_assert(sizeof(Header) == 0x50, "Incorrect structure size");
|
||||||
|
|
|
@ -278,7 +278,7 @@ public:
|
||||||
const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix,
|
const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix,
|
||||||
const Tegra::Shader::Header& header)
|
const Tegra::Shader::Header& header)
|
||||||
: shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header},
|
: shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header},
|
||||||
fixed_pipeline_output_attributes_used{} {
|
fixed_pipeline_output_attributes_used{}, local_memory_size{0} {
|
||||||
BuildRegisterList();
|
BuildRegisterList();
|
||||||
BuildInputList();
|
BuildInputList();
|
||||||
}
|
}
|
||||||
|
@ -436,6 +436,25 @@ public:
|
||||||
shader.AddLine(dest + " = " + src + ';');
|
shader.AddLine(dest + " = " + src + ';');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string GetLocalMemoryAsFloat(const std::string& index) {
|
||||||
|
return "lmem[" + index + ']';
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) {
|
||||||
|
const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"};
|
||||||
|
return func + "(lmem[" + index + "])";
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) {
|
||||||
|
shader.AddLine("lmem[" + index + "] = " + value + ';');
|
||||||
|
}
|
||||||
|
|
||||||
|
void SetLocalMemoryAsInteger(const std::string& index, const std::string& value,
|
||||||
|
bool is_signed = false) {
|
||||||
|
const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
|
||||||
|
shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");");
|
||||||
|
}
|
||||||
|
|
||||||
std::string GetControlCode(const Tegra::Shader::ControlCode cc) const {
|
std::string GetControlCode(const Tegra::Shader::ControlCode cc) const {
|
||||||
switch (cc) {
|
switch (cc) {
|
||||||
case Tegra::Shader::ControlCode::NEU:
|
case Tegra::Shader::ControlCode::NEU:
|
||||||
|
@ -533,6 +552,7 @@ public:
|
||||||
void GenerateDeclarations(const std::string& suffix) {
|
void GenerateDeclarations(const std::string& suffix) {
|
||||||
GenerateVertex();
|
GenerateVertex();
|
||||||
GenerateRegisters(suffix);
|
GenerateRegisters(suffix);
|
||||||
|
GenerateLocalMemory();
|
||||||
GenerateInternalFlags();
|
GenerateInternalFlags();
|
||||||
GenerateInputAttrs();
|
GenerateInputAttrs();
|
||||||
GenerateOutputAttrs();
|
GenerateOutputAttrs();
|
||||||
|
@ -578,6 +598,10 @@ public:
|
||||||
return entry.GetName();
|
return entry.GetName();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void SetLocalMemory(u64 lmem) {
|
||||||
|
local_memory_size = lmem;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Generates declarations for registers.
|
/// Generates declarations for registers.
|
||||||
void GenerateRegisters(const std::string& suffix) {
|
void GenerateRegisters(const std::string& suffix) {
|
||||||
|
@ -588,6 +612,15 @@ private:
|
||||||
declarations.AddNewLine();
|
declarations.AddNewLine();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generates declarations for local memory.
|
||||||
|
void GenerateLocalMemory() {
|
||||||
|
if (local_memory_size > 0) {
|
||||||
|
declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) +
|
||||||
|
"];");
|
||||||
|
declarations.AddNewLine();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Generates declarations for internal flags.
|
/// Generates declarations for internal flags.
|
||||||
void GenerateInternalFlags() {
|
void GenerateInternalFlags() {
|
||||||
for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
|
for (u32 ii = 0; ii < static_cast<u64>(InternalFlag::Amount); ii++) {
|
||||||
|
@ -895,6 +928,7 @@ private:
|
||||||
const std::string& suffix;
|
const std::string& suffix;
|
||||||
const Tegra::Shader::Header& header;
|
const Tegra::Shader::Header& header;
|
||||||
std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used;
|
std::unordered_set<Attribute::Index> fixed_pipeline_output_attributes_used;
|
||||||
|
u64 local_memory_size;
|
||||||
};
|
};
|
||||||
|
|
||||||
class GLSLGenerator {
|
class GLSLGenerator {
|
||||||
|
@ -904,6 +938,8 @@ public:
|
||||||
: subroutines(subroutines), program_code(program_code), main_offset(main_offset),
|
: subroutines(subroutines), program_code(program_code), main_offset(main_offset),
|
||||||
stage(stage), suffix(suffix) {
|
stage(stage), suffix(suffix) {
|
||||||
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
|
std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
|
||||||
|
local_memory_size = header.GetLocalMemorySize();
|
||||||
|
regs.SetLocalMemory(local_memory_size);
|
||||||
Generate(suffix);
|
Generate(suffix);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2324,6 +2360,39 @@ private:
|
||||||
shader.AddLine("}");
|
shader.AddLine("}");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case OpCode::Id::LD_L: {
|
||||||
|
// Add an extra scope and declare the index register inside to prevent
|
||||||
|
// overwriting it in case it is used as an output of the LD instruction.
|
||||||
|
shader.AddLine('{');
|
||||||
|
++shader.scope;
|
||||||
|
|
||||||
|
std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
|
||||||
|
std::to_string(instr.smem_imm.Value()) + ')';
|
||||||
|
|
||||||
|
shader.AddLine("uint index = (" + op + " / 4);");
|
||||||
|
|
||||||
|
const std::string op_a = regs.GetLocalMemoryAsFloat("index");
|
||||||
|
|
||||||
|
if (instr.ld_l.unknown != 1) {
|
||||||
|
LOG_CRITICAL(HW_GPU, "LD_L Unhandled mode: {}",
|
||||||
|
static_cast<unsigned>(instr.ld_l.unknown.Value()));
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (instr.ldst_sl.type.Value()) {
|
||||||
|
case Tegra::Shader::StoreType::Bytes32:
|
||||||
|
regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG_CRITICAL(HW_GPU, "LD_L Unhandled type: {}",
|
||||||
|
static_cast<unsigned>(instr.ldst_sl.type.Value()));
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
--shader.scope;
|
||||||
|
shader.AddLine('}');
|
||||||
|
break;
|
||||||
|
}
|
||||||
case OpCode::Id::ST_A: {
|
case OpCode::Id::ST_A: {
|
||||||
ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
|
ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex,
|
||||||
"Indirect attribute loads are not supported");
|
"Indirect attribute loads are not supported");
|
||||||
|
@ -2352,6 +2421,37 @@ private:
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case OpCode::Id::ST_L: {
|
||||||
|
// Add an extra scope and declare the index register inside to prevent
|
||||||
|
// overwriting it in case it is used as an output of the LD instruction.
|
||||||
|
shader.AddLine('{');
|
||||||
|
++shader.scope;
|
||||||
|
|
||||||
|
std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
|
||||||
|
std::to_string(instr.smem_imm.Value()) + ')';
|
||||||
|
|
||||||
|
shader.AddLine("uint index = (" + op + " / 4);");
|
||||||
|
|
||||||
|
if (instr.st_l.unknown != 0) {
|
||||||
|
LOG_CRITICAL(HW_GPU, "ST_L Unhandled mode: {}",
|
||||||
|
static_cast<unsigned>(instr.st_l.unknown.Value()));
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (instr.ldst_sl.type.Value()) {
|
||||||
|
case Tegra::Shader::StoreType::Bytes32:
|
||||||
|
regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0));
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG_CRITICAL(HW_GPU, "ST_L Unhandled type: {}",
|
||||||
|
static_cast<unsigned>(instr.ldst_sl.type.Value()));
|
||||||
|
UNREACHABLE();
|
||||||
|
}
|
||||||
|
|
||||||
|
--shader.scope;
|
||||||
|
shader.AddLine('}');
|
||||||
|
break;
|
||||||
|
}
|
||||||
case OpCode::Id::TEX: {
|
case OpCode::Id::TEX: {
|
||||||
Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
|
Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
|
||||||
std::string coord;
|
std::string coord;
|
||||||
|
@ -3575,6 +3675,7 @@ private:
|
||||||
const u32 main_offset;
|
const u32 main_offset;
|
||||||
Maxwell3D::Regs::ShaderStage stage;
|
Maxwell3D::Regs::ShaderStage stage;
|
||||||
const std::string& suffix;
|
const std::string& suffix;
|
||||||
|
u64 local_memory_size;
|
||||||
|
|
||||||
ShaderWriter shader;
|
ShaderWriter shader;
|
||||||
ShaderWriter declarations;
|
ShaderWriter declarations;
|
||||||
|
|
Loading…
Reference in a new issue