Merge pull request #2485 from ReinUsesLisp/generic-memory
shader/memory: Implement generic memory stores and loads (ST and LD)
This commit is contained in:
commit
1a2d90ab09
|
@ -529,6 +529,11 @@ union Instruction {
|
||||||
BitField<39, 8, Register> gpr39;
|
BitField<39, 8, Register> gpr39;
|
||||||
BitField<48, 16, u64> opcode;
|
BitField<48, 16, u64> opcode;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<8, 8, Register> gpr;
|
||||||
|
BitField<20, 24, s64> offset;
|
||||||
|
} gmem;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
BitField<20, 16, u64> imm20_16;
|
BitField<20, 16, u64> imm20_16;
|
||||||
BitField<20, 19, u64> imm20_19;
|
BitField<20, 19, u64> imm20_19;
|
||||||
|
@ -812,13 +817,11 @@ union Instruction {
|
||||||
union {
|
union {
|
||||||
BitField<48, 3, UniformType> type;
|
BitField<48, 3, UniformType> type;
|
||||||
BitField<46, 2, u64> cache_mode;
|
BitField<46, 2, u64> cache_mode;
|
||||||
BitField<20, 24, s64> immediate_offset;
|
|
||||||
} ldg;
|
} ldg;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
BitField<48, 3, UniformType> type;
|
BitField<48, 3, UniformType> type;
|
||||||
BitField<46, 2, u64> cache_mode;
|
BitField<46, 2, u64> cache_mode;
|
||||||
BitField<20, 24, s64> immediate_offset;
|
|
||||||
} stg;
|
} stg;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
|
@ -827,6 +830,11 @@ union Instruction {
|
||||||
BitField<20, 11, u64> address;
|
BitField<20, 11, u64> address;
|
||||||
} al2p;
|
} al2p;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<53, 3, UniformType> type;
|
||||||
|
BitField<52, 1, u64> extended;
|
||||||
|
} generic;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
BitField<0, 3, u64> pred0;
|
BitField<0, 3, u64> pred0;
|
||||||
BitField<3, 3, u64> pred3;
|
BitField<3, 3, u64> pred3;
|
||||||
|
@ -1387,10 +1395,12 @@ public:
|
||||||
LD_L,
|
LD_L,
|
||||||
LD_S,
|
LD_S,
|
||||||
LD_C,
|
LD_C,
|
||||||
|
LD, // Load from generic memory
|
||||||
|
LDG, // Load from global memory
|
||||||
ST_A,
|
ST_A,
|
||||||
ST_L,
|
ST_L,
|
||||||
ST_S,
|
ST_S,
|
||||||
LDG, // Load from global memory
|
ST, // Store in generic memory
|
||||||
STG, // Store in global memory
|
STG, // Store in global memory
|
||||||
AL2P, // Transforms attribute memory into physical memory
|
AL2P, // Transforms attribute memory into physical memory
|
||||||
TEX,
|
TEX,
|
||||||
|
@ -1658,10 +1668,12 @@ private:
|
||||||
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
|
INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
|
||||||
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
|
INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
|
||||||
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
|
INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"),
|
||||||
|
INST("100-------------", Id::LD, Type::Memory, "LD"),
|
||||||
|
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
|
||||||
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
|
INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"),
|
||||||
INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
|
INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"),
|
||||||
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||||
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
|
INST("101-------------", Id::ST, Type::Memory, "ST"),
|
||||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||||
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
||||||
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
||||||
|
|
|
@ -146,12 +146,25 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case OpCode::Id::LD:
|
||||||
case OpCode::Id::LDG: {
|
case OpCode::Id::LDG: {
|
||||||
const auto [real_address_base, base_address, descriptor] =
|
const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
|
||||||
TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
|
switch (opcode->get().GetId()) {
|
||||||
static_cast<u32>(instr.ldg.immediate_offset.Value()), false);
|
case OpCode::Id::LD:
|
||||||
|
UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented");
|
||||||
|
return instr.generic.type;
|
||||||
|
case OpCode::Id::LDG:
|
||||||
|
return instr.ldg.type;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
const u32 count = GetUniformTypeElementsCount(instr.ldg.type);
|
const auto [real_address_base, base_address, descriptor] =
|
||||||
|
TrackAndGetGlobalMemory(bb, instr, false);
|
||||||
|
|
||||||
|
const u32 count = GetUniformTypeElementsCount(type);
|
||||||
for (u32 i = 0; i < count; ++i) {
|
for (u32 i = 0; i < count; ++i) {
|
||||||
const Node it_offset = Immediate(i * 4);
|
const Node it_offset = Immediate(i * 4);
|
||||||
const Node real_address =
|
const Node real_address =
|
||||||
|
@ -165,28 +178,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Id::STG: {
|
|
||||||
const auto [real_address_base, base_address, descriptor] =
|
|
||||||
TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8),
|
|
||||||
static_cast<u32>(instr.stg.immediate_offset.Value()), true);
|
|
||||||
|
|
||||||
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
|
|
||||||
SetTemporal(bb, 0, real_address_base);
|
|
||||||
|
|
||||||
const u32 count = GetUniformTypeElementsCount(instr.stg.type);
|
|
||||||
for (u32 i = 0; i < count; ++i) {
|
|
||||||
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
|
|
||||||
}
|
|
||||||
for (u32 i = 0; i < count; ++i) {
|
|
||||||
const Node it_offset = Immediate(i * 4);
|
|
||||||
const Node real_address =
|
|
||||||
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
|
|
||||||
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
|
|
||||||
|
|
||||||
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case OpCode::Id::ST_A: {
|
case OpCode::Id::ST_A: {
|
||||||
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
|
UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex,
|
||||||
"Indirect attribute loads are not supported");
|
"Indirect attribute loads are not supported");
|
||||||
|
@ -242,6 +233,41 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case OpCode::Id::ST:
|
||||||
|
case OpCode::Id::STG: {
|
||||||
|
const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType {
|
||||||
|
switch (opcode->get().GetId()) {
|
||||||
|
case OpCode::Id::ST:
|
||||||
|
UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented");
|
||||||
|
return instr.generic.type;
|
||||||
|
case OpCode::Id::STG:
|
||||||
|
return instr.stg.type;
|
||||||
|
default:
|
||||||
|
UNREACHABLE();
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
|
||||||
|
const auto [real_address_base, base_address, descriptor] =
|
||||||
|
TrackAndGetGlobalMemory(bb, instr, true);
|
||||||
|
|
||||||
|
// Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
|
||||||
|
SetTemporal(bb, 0, real_address_base);
|
||||||
|
|
||||||
|
const u32 count = GetUniformTypeElementsCount(type);
|
||||||
|
for (u32 i = 0; i < count; ++i) {
|
||||||
|
SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
|
||||||
|
}
|
||||||
|
for (u32 i = 0; i < count; ++i) {
|
||||||
|
const Node it_offset = Immediate(i * 4);
|
||||||
|
const Node real_address =
|
||||||
|
Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
|
||||||
|
const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor));
|
||||||
|
|
||||||
|
bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1)));
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
case OpCode::Id::AL2P: {
|
case OpCode::Id::AL2P: {
|
||||||
// Ignore al2p.direction since we don't care about it.
|
// Ignore al2p.direction since we don't care about it.
|
||||||
|
|
||||||
|
@ -265,9 +291,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
|
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
|
||||||
Node addr_register,
|
Instruction instr,
|
||||||
u32 immediate_offset,
|
|
||||||
bool is_write) {
|
bool is_write) {
|
||||||
|
const auto addr_register{GetRegister(instr.gmem.gpr)};
|
||||||
|
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
|
||||||
|
|
||||||
const Node base_address{
|
const Node base_address{
|
||||||
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
|
TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()))};
|
||||||
const auto cbuf = std::get_if<CbufNode>(base_address);
|
const auto cbuf = std::get_if<CbufNode>(base_address);
|
||||||
|
|
|
@ -818,10 +818,8 @@ private:
|
||||||
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
|
std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
|
||||||
s64 cursor) const;
|
s64 cursor) const;
|
||||||
|
|
||||||
std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(NodeBlock& bb,
|
std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(
|
||||||
Node addr_register,
|
NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write);
|
||||||
u32 immediate_offset,
|
|
||||||
bool is_write);
|
|
||||||
|
|
||||||
template <typename... T>
|
template <typename... T>
|
||||||
Node Operation(OperationCode code, const T*... operands) {
|
Node Operation(OperationCode code, const T*... operands) {
|
||||||
|
|
Loading…
Reference in New Issue