Pica: Add debug utilities for dumping shaders.
This commit is contained in:
parent
6ea003c7b5
commit
f37e39deb9
|
@ -2,6 +2,7 @@
|
||||||
// Licensed under GPLv2
|
// Licensed under GPLv2
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
@ -55,6 +56,210 @@ void GeometryDumper::Dump() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#pragma pack(1)
|
||||||
|
struct DVLBHeader {
|
||||||
|
enum : u32 {
|
||||||
|
MAGIC_WORD = 0x424C5644, // "DVLB"
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 magic_word;
|
||||||
|
u32 num_programs;
|
||||||
|
// u32 dvle_offset_table[];
|
||||||
|
};
|
||||||
|
static_assert(sizeof(DVLBHeader) == 0x8, "Incorrect structure size");
|
||||||
|
|
||||||
|
struct DVLPHeader {
|
||||||
|
enum : u32 {
|
||||||
|
MAGIC_WORD = 0x504C5644, // "DVLP"
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 magic_word;
|
||||||
|
u32 version;
|
||||||
|
u32 binary_offset; // relative to DVLP start
|
||||||
|
u32 binary_size_words;
|
||||||
|
u32 swizzle_patterns_offset;
|
||||||
|
u32 swizzle_patterns_num_entries;
|
||||||
|
u32 unk2;
|
||||||
|
};
|
||||||
|
static_assert(sizeof(DVLPHeader) == 0x1C, "Incorrect structure size");
|
||||||
|
|
||||||
|
struct DVLEHeader {
|
||||||
|
enum : u32 {
|
||||||
|
MAGIC_WORD = 0x454c5644, // "DVLE"
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class ShaderType : u8 {
|
||||||
|
VERTEX = 0,
|
||||||
|
GEOMETRY = 1,
|
||||||
|
};
|
||||||
|
|
||||||
|
u32 magic_word;
|
||||||
|
u16 pad1;
|
||||||
|
ShaderType type;
|
||||||
|
u8 pad2;
|
||||||
|
u32 main_offset_words; // offset within binary blob
|
||||||
|
u32 endmain_offset_words;
|
||||||
|
u32 pad3;
|
||||||
|
u32 pad4;
|
||||||
|
u32 constant_table_offset;
|
||||||
|
u32 constant_table_size; // number of entries
|
||||||
|
u32 label_table_offset;
|
||||||
|
u32 label_table_size;
|
||||||
|
u32 output_register_table_offset;
|
||||||
|
u32 output_register_table_size;
|
||||||
|
u32 uniform_table_offset;
|
||||||
|
u32 uniform_table_size;
|
||||||
|
u32 symbol_table_offset;
|
||||||
|
u32 symbol_table_size;
|
||||||
|
|
||||||
|
};
|
||||||
|
static_assert(sizeof(DVLEHeader) == 0x40, "Incorrect structure size");
|
||||||
|
#pragma pack()
|
||||||
|
|
||||||
|
void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size,
|
||||||
|
u32 main_offset, const Regs::VSOutputAttributes* output_attributes)
|
||||||
|
{
|
||||||
|
// NOTE: Permanently enabling this just trashes hard disks for no reason.
|
||||||
|
// Hence, this is currently disabled.
|
||||||
|
return;
|
||||||
|
|
||||||
|
struct StuffToWrite {
|
||||||
|
u8* pointer;
|
||||||
|
u32 size;
|
||||||
|
};
|
||||||
|
std::vector<StuffToWrite> writing_queue;
|
||||||
|
u32 write_offset = 0;
|
||||||
|
|
||||||
|
auto QueueForWriting = [&writing_queue,&write_offset](u8* pointer, u32 size) {
|
||||||
|
writing_queue.push_back({pointer, size});
|
||||||
|
u32 old_write_offset = write_offset;
|
||||||
|
write_offset += size;
|
||||||
|
return old_write_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
// First off, try to translate Pica state (one enum for output attribute type and component)
|
||||||
|
// into shbin format (separate type and component mask).
|
||||||
|
union OutputRegisterInfo {
|
||||||
|
enum Type : u64 {
|
||||||
|
POSITION = 0,
|
||||||
|
COLOR = 2,
|
||||||
|
TEXCOORD0 = 3,
|
||||||
|
TEXCOORD1 = 5,
|
||||||
|
TEXCOORD2 = 6,
|
||||||
|
};
|
||||||
|
|
||||||
|
BitField< 0, 64, u64> hex;
|
||||||
|
|
||||||
|
BitField< 0, 16, Type> type;
|
||||||
|
BitField<16, 16, u64> id;
|
||||||
|
BitField<32, 4, u64> component_mask;
|
||||||
|
};
|
||||||
|
|
||||||
|
// This is put into a try-catch block to make sure we notice unknown configurations.
|
||||||
|
std::vector<OutputRegisterInfo> output_info_table;
|
||||||
|
for (int i = 0; i < 7; ++i) {
|
||||||
|
using OutputAttributes = Pica::Regs::VSOutputAttributes;
|
||||||
|
|
||||||
|
// TODO: It's still unclear how the attribute components map to the register!
|
||||||
|
// Once we know that, this code probably will not make much sense anymore.
|
||||||
|
std::map<OutputAttributes::Semantic, std::pair<OutputRegisterInfo::Type, u32> > map = {
|
||||||
|
{ OutputAttributes::POSITION_X, { OutputRegisterInfo::POSITION, 1} },
|
||||||
|
{ OutputAttributes::POSITION_Y, { OutputRegisterInfo::POSITION, 2} },
|
||||||
|
{ OutputAttributes::POSITION_Z, { OutputRegisterInfo::POSITION, 4} },
|
||||||
|
{ OutputAttributes::POSITION_W, { OutputRegisterInfo::POSITION, 8} },
|
||||||
|
{ OutputAttributes::COLOR_R, { OutputRegisterInfo::COLOR, 1} },
|
||||||
|
{ OutputAttributes::COLOR_G, { OutputRegisterInfo::COLOR, 2} },
|
||||||
|
{ OutputAttributes::COLOR_B, { OutputRegisterInfo::COLOR, 4} },
|
||||||
|
{ OutputAttributes::COLOR_A, { OutputRegisterInfo::COLOR, 8} },
|
||||||
|
{ OutputAttributes::TEXCOORD0_U, { OutputRegisterInfo::TEXCOORD0, 1} },
|
||||||
|
{ OutputAttributes::TEXCOORD0_V, { OutputRegisterInfo::TEXCOORD0, 2} },
|
||||||
|
{ OutputAttributes::TEXCOORD1_U, { OutputRegisterInfo::TEXCOORD1, 1} },
|
||||||
|
{ OutputAttributes::TEXCOORD1_V, { OutputRegisterInfo::TEXCOORD1, 2} },
|
||||||
|
{ OutputAttributes::TEXCOORD2_U, { OutputRegisterInfo::TEXCOORD2, 1} },
|
||||||
|
{ OutputAttributes::TEXCOORD2_V, { OutputRegisterInfo::TEXCOORD2, 2} }
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const auto& semantic : std::vector<OutputAttributes::Semantic>{
|
||||||
|
output_attributes[i].map_x,
|
||||||
|
output_attributes[i].map_y,
|
||||||
|
output_attributes[i].map_z,
|
||||||
|
output_attributes[i].map_w }) {
|
||||||
|
if (semantic == OutputAttributes::INVALID)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
try {
|
||||||
|
OutputRegisterInfo::Type type = map.at(semantic).first;
|
||||||
|
u32 component_mask = map.at(semantic).second;
|
||||||
|
|
||||||
|
auto it = std::find_if(output_info_table.begin(), output_info_table.end(),
|
||||||
|
[&i, &type](const OutputRegisterInfo& info) {
|
||||||
|
return info.id == i && info.type == type;
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
if (it == output_info_table.end()) {
|
||||||
|
output_info_table.push_back({});
|
||||||
|
output_info_table.back().type = type;
|
||||||
|
output_info_table.back().component_mask = component_mask;
|
||||||
|
output_info_table.back().id = i;
|
||||||
|
} else {
|
||||||
|
it->component_mask = it->component_mask | component_mask;
|
||||||
|
}
|
||||||
|
} catch (const std::out_of_range& oor) {
|
||||||
|
_dbg_assert_msg_(GPU, 0, "Unknown output attribute mapping");
|
||||||
|
ERROR_LOG(GPU, "Unknown output attribute mapping: %03x, %03x, %03x, %03x",
|
||||||
|
(int)output_attributes[i].map_x.Value(),
|
||||||
|
(int)output_attributes[i].map_y.Value(),
|
||||||
|
(int)output_attributes[i].map_z.Value(),
|
||||||
|
(int)output_attributes[i].map_w.Value());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
struct {
|
||||||
|
DVLBHeader header;
|
||||||
|
u32 dvle_offset;
|
||||||
|
} dvlb{ {DVLBHeader::MAGIC_WORD, 1 } }; // 1 DVLE
|
||||||
|
|
||||||
|
DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD };
|
||||||
|
DVLEHeader dvle{ DVLEHeader::MAGIC_WORD };
|
||||||
|
|
||||||
|
QueueForWriting((u8*)&dvlb, sizeof(dvlb));
|
||||||
|
u32 dvlp_offset = QueueForWriting((u8*)&dvlp, sizeof(dvlp));
|
||||||
|
dvlb.dvle_offset = QueueForWriting((u8*)&dvle, sizeof(dvle));
|
||||||
|
|
||||||
|
// TODO: Reduce the amount of binary code written to relevant portions
|
||||||
|
dvlp.binary_offset = write_offset - dvlp_offset;
|
||||||
|
dvlp.binary_size_words = binary_size;
|
||||||
|
QueueForWriting((u8*)binary_data, binary_size * sizeof(u32));
|
||||||
|
|
||||||
|
dvlp.swizzle_patterns_offset = write_offset - dvlp_offset;
|
||||||
|
dvlp.swizzle_patterns_num_entries = swizzle_size;
|
||||||
|
u32 dummy = 0;
|
||||||
|
for (int i = 0; i < swizzle_size; ++i) {
|
||||||
|
QueueForWriting((u8*)&swizzle_data[i], sizeof(swizzle_data[i]));
|
||||||
|
QueueForWriting((u8*)&dummy, sizeof(dummy));
|
||||||
|
}
|
||||||
|
|
||||||
|
dvle.main_offset_words = main_offset;
|
||||||
|
dvle.output_register_table_offset = write_offset - dvlb.dvle_offset;
|
||||||
|
dvle.output_register_table_size = output_info_table.size();
|
||||||
|
QueueForWriting((u8*)output_info_table.data(), output_info_table.size() * sizeof(OutputRegisterInfo));
|
||||||
|
|
||||||
|
// TODO: Create a label table for "main"
|
||||||
|
|
||||||
|
|
||||||
|
// Write data to file
|
||||||
|
static int dump_index = 0;
|
||||||
|
std::string filename = std::string("shader_dump") + std::to_string(++dump_index) + std::string(".shbin");
|
||||||
|
std::ofstream file(filename, std::ios_base::out | std::ios_base::binary);
|
||||||
|
|
||||||
|
for (auto& chunk : writing_queue) {
|
||||||
|
file.write((char*)chunk.pointer, chunk.size);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -35,6 +35,9 @@ private:
|
||||||
std::vector<Face> faces;
|
std::vector<Face> faces;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size,
|
||||||
|
u32 main_offset, const Regs::VSOutputAttributes* output_attributes);
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
|
@ -57,7 +57,7 @@ struct Regs {
|
||||||
|
|
||||||
INSERT_PADDING_WORDS(0x1);
|
INSERT_PADDING_WORDS(0x1);
|
||||||
|
|
||||||
union {
|
union VSOutputAttributes {
|
||||||
// Maps components of output vertex attributes to semantics
|
// Maps components of output vertex attributes to semantics
|
||||||
enum Semantic : u32
|
enum Semantic : u32
|
||||||
{
|
{
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#include "pica.h"
|
#include "pica.h"
|
||||||
#include "vertex_shader.h"
|
#include "vertex_shader.h"
|
||||||
|
#include "debug_utils/debug_utils.h"
|
||||||
#include <core/mem_map.h>
|
#include <core/mem_map.h>
|
||||||
#include <common/file_util.h>
|
#include <common/file_util.h>
|
||||||
|
|
||||||
|
@ -50,6 +51,11 @@ struct VertexShaderState {
|
||||||
};
|
};
|
||||||
u32 call_stack[8]; // TODO: What is the maximal call stack depth?
|
u32 call_stack[8]; // TODO: What is the maximal call stack depth?
|
||||||
u32* call_stack_pointer;
|
u32* call_stack_pointer;
|
||||||
|
|
||||||
|
struct {
|
||||||
|
u32 max_offset; // maximum program counter ever reached
|
||||||
|
u32 max_opdesc_id; // maximum swizzle pattern index ever used
|
||||||
|
} debug;
|
||||||
};
|
};
|
||||||
|
|
||||||
static void ProcessShaderCode(VertexShaderState& state) {
|
static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
|
@ -57,6 +63,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
bool increment_pc = true;
|
bool increment_pc = true;
|
||||||
bool exit_loop = false;
|
bool exit_loop = false;
|
||||||
const Instruction& instr = *(const Instruction*)state.program_counter;
|
const Instruction& instr = *(const Instruction*)state.program_counter;
|
||||||
|
state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + (state.program_counter - shader_memory));
|
||||||
|
|
||||||
const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1]
|
const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1]
|
||||||
: (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x
|
: (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1-0x10].x
|
||||||
|
@ -88,6 +95,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
switch (instr.opcode) {
|
switch (instr.opcode) {
|
||||||
case Instruction::OpCode::ADD:
|
case Instruction::OpCode::ADD:
|
||||||
{
|
{
|
||||||
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (!swizzle.DestComponentEnabled(i))
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
continue;
|
continue;
|
||||||
|
@ -100,6 +108,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
|
|
||||||
case Instruction::OpCode::MUL:
|
case Instruction::OpCode::MUL:
|
||||||
{
|
{
|
||||||
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (!swizzle.DestComponentEnabled(i))
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
continue;
|
continue;
|
||||||
|
@ -113,6 +122,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
case Instruction::OpCode::DP3:
|
case Instruction::OpCode::DP3:
|
||||||
case Instruction::OpCode::DP4:
|
case Instruction::OpCode::DP4:
|
||||||
{
|
{
|
||||||
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
float24 dot = float24::FromFloat32(0.f);
|
float24 dot = float24::FromFloat32(0.f);
|
||||||
int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
|
int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
|
||||||
for (int i = 0; i < num_components; ++i)
|
for (int i = 0; i < num_components; ++i)
|
||||||
|
@ -130,6 +140,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
// Reciprocal
|
// Reciprocal
|
||||||
case Instruction::OpCode::RCP:
|
case Instruction::OpCode::RCP:
|
||||||
{
|
{
|
||||||
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (!swizzle.DestComponentEnabled(i))
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
continue;
|
continue;
|
||||||
|
@ -145,6 +156,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
// Reciprocal Square Root
|
// Reciprocal Square Root
|
||||||
case Instruction::OpCode::RSQ:
|
case Instruction::OpCode::RSQ:
|
||||||
{
|
{
|
||||||
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (!swizzle.DestComponentEnabled(i))
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
continue;
|
continue;
|
||||||
|
@ -159,6 +171,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
|
||||||
|
|
||||||
case Instruction::OpCode::MOV:
|
case Instruction::OpCode::MOV:
|
||||||
{
|
{
|
||||||
|
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
|
||||||
for (int i = 0; i < 4; ++i) {
|
for (int i = 0; i < 4; ++i) {
|
||||||
if (!swizzle.DestComponentEnabled(i))
|
if (!swizzle.DestComponentEnabled(i))
|
||||||
continue;
|
continue;
|
||||||
|
@ -212,6 +225,8 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
|
||||||
|
|
||||||
const u32* main = &shader_memory[registers.vs_main_offset];
|
const u32* main = &shader_memory[registers.vs_main_offset];
|
||||||
state.program_counter = (u32*)main;
|
state.program_counter = (u32*)main;
|
||||||
|
state.debug.max_offset = 0;
|
||||||
|
state.debug.max_opdesc_id = 0;
|
||||||
|
|
||||||
// Setup input register table
|
// Setup input register table
|
||||||
const auto& attribute_register_map = registers.vs_input_register_map;
|
const auto& attribute_register_map = registers.vs_input_register_map;
|
||||||
|
@ -255,6 +270,9 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes)
|
||||||
state.call_stack_pointer = &state.call_stack[0];
|
state.call_stack_pointer = &state.call_stack[0];
|
||||||
|
|
||||||
ProcessShaderCode(state);
|
ProcessShaderCode(state);
|
||||||
|
DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data,
|
||||||
|
state.debug.max_opdesc_id, registers.vs_main_offset,
|
||||||
|
registers.vs_output_attributes);
|
||||||
|
|
||||||
DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
|
DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
|
||||||
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
|
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
|
||||||
|
|
Loading…
Reference in a new issue