shader_ir/memory: Implement u16 and u8 for STG and LDG
Using the same technique we used for u8 on LDG, implement u16. In the case of STG, load memory and insert the value we want to set into it with bitfieldInsert. Then set that value.
This commit is contained in:
parent
80436c1330
commit
e2a2a556b9
|
@ -6,6 +6,7 @@
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
|
#include "common/alignment.h"
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "common/logging/log.h"
|
#include "common/logging/log.h"
|
||||||
|
@ -22,34 +23,39 @@ using Tegra::Shader::Register;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
|
bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
|
||||||
|
return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
|
||||||
|
uniform_type == Tegra::Shader::UniformType::UnsignedShort;
|
||||||
|
}
|
||||||
|
|
||||||
|
u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
|
||||||
switch (uniform_type) {
|
switch (uniform_type) {
|
||||||
case Tegra::Shader::UniformType::UnsignedByte:
|
case Tegra::Shader::UniformType::UnsignedByte:
|
||||||
case Tegra::Shader::UniformType::Single:
|
return 0b11;
|
||||||
return 1;
|
case Tegra::Shader::UniformType::UnsignedShort:
|
||||||
case Tegra::Shader::UniformType::Double:
|
return 0b10;
|
||||||
return 2;
|
|
||||||
case Tegra::Shader::UniformType::Quad:
|
|
||||||
case Tegra::Shader::UniformType::UnsignedQuad:
|
|
||||||
return 4;
|
|
||||||
default:
|
default:
|
||||||
UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
|
UNREACHABLE();
|
||||||
return 1;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
|
u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
|
||||||
switch (uniform_type) {
|
switch (uniform_type) {
|
||||||
|
case Tegra::Shader::UniformType::UnsignedByte:
|
||||||
|
return 8;
|
||||||
|
case Tegra::Shader::UniformType::UnsignedShort:
|
||||||
|
return 16;
|
||||||
case Tegra::Shader::UniformType::Single:
|
case Tegra::Shader::UniformType::Single:
|
||||||
return 1;
|
return 32;
|
||||||
case Tegra::Shader::UniformType::Double:
|
case Tegra::Shader::UniformType::Double:
|
||||||
return 2;
|
return 64;
|
||||||
case Tegra::Shader::UniformType::Quad:
|
case Tegra::Shader::UniformType::Quad:
|
||||||
case Tegra::Shader::UniformType::UnsignedQuad:
|
case Tegra::Shader::UniformType::UnsignedQuad:
|
||||||
return 4;
|
return 128;
|
||||||
default:
|
default:
|
||||||
UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
|
UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
|
||||||
return 1;
|
return 32;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -184,9 +190,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||||
}();
|
}();
|
||||||
|
|
||||||
const auto [real_address_base, base_address, descriptor] =
|
const auto [real_address_base, base_address, descriptor] =
|
||||||
TrackGlobalMemory(bb, instr, false);
|
TrackGlobalMemory(bb, instr, true, false);
|
||||||
|
|
||||||
const u32 count = GetLdgMemorySize(type);
|
const u32 size = GetMemorySize(type);
|
||||||
|
const u32 count = Common::AlignUp(size, 32) / 32;
|
||||||
if (!real_address_base || !base_address) {
|
if (!real_address_base || !base_address) {
|
||||||
// Tracking failed, load zeroes.
|
// Tracking failed, load zeroes.
|
||||||
for (u32 i = 0; i < count; ++i) {
|
for (u32 i = 0; i < count; ++i) {
|
||||||
|
@ -200,14 +207,15 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||||
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
|
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
|
||||||
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||||
|
|
||||||
if (type == Tegra::Shader::UniformType::UnsignedByte) {
|
// To handle unaligned loads get the bytes used to dereference global memory and extract
|
||||||
// To handle unaligned loads get the byte used to dereferenced global memory
|
// those bytes from the loaded u32.
|
||||||
// and extract that byte from the loaded uint32.
|
if (IsUnaligned(type)) {
|
||||||
Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
|
Node mask = Immediate(GetUnalignedMask(type));
|
||||||
byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
|
Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
|
||||||
|
offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
|
||||||
|
|
||||||
gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
|
gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
|
||||||
Immediate(8));
|
std::move(offset), Immediate(size));
|
||||||
}
|
}
|
||||||
|
|
||||||
SetTemporary(bb, i, gmem);
|
SetTemporary(bb, i, gmem);
|
||||||
|
@ -295,19 +303,32 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
|
|
||||||
|
// For unaligned reads we have to read memory too.
|
||||||
|
const bool is_read = IsUnaligned(type);
|
||||||
const auto [real_address_base, base_address, descriptor] =
|
const auto [real_address_base, base_address, descriptor] =
|
||||||
TrackGlobalMemory(bb, instr, true);
|
TrackGlobalMemory(bb, instr, is_read, true);
|
||||||
if (!real_address_base || !base_address) {
|
if (!real_address_base || !base_address) {
|
||||||
// Tracking failed, skip the store.
|
// Tracking failed, skip the store.
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 count = GetStgMemorySize(type);
|
const u32 size = GetMemorySize(type);
|
||||||
|
const u32 count = Common::AlignUp(size, 32) / 32;
|
||||||
for (u32 i = 0; i < count; ++i) {
|
for (u32 i = 0; i < count; ++i) {
|
||||||
const Node it_offset = Immediate(i * 4);
|
const Node it_offset = Immediate(i * 4);
|
||||||
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
|
const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
|
||||||
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||||
const Node value = GetRegister(instr.gpr0.Value() + i);
|
Node value = GetRegister(instr.gpr0.Value() + i);
|
||||||
|
|
||||||
|
if (IsUnaligned(type)) {
|
||||||
|
Node mask = Immediate(GetUnalignedMask(type));
|
||||||
|
Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
|
||||||
|
offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
|
||||||
|
|
||||||
|
value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
|
||||||
|
Immediate(size));
|
||||||
|
}
|
||||||
|
|
||||||
bb.push_back(Operation(OperationCode::Assign, gmem, value));
|
bb.push_back(Operation(OperationCode::Assign, gmem, value));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
@ -336,7 +357,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||||
|
|
||||||
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
|
std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
|
||||||
Instruction instr,
|
Instruction instr,
|
||||||
bool is_write) {
|
bool is_read, bool is_write) {
|
||||||
const auto addr_register{GetRegister(instr.gmem.gpr)};
|
const auto addr_register{GetRegister(instr.gmem.gpr)};
|
||||||
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
|
const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
|
||||||
|
|
||||||
|
@ -351,11 +372,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
|
||||||
const GlobalMemoryBase descriptor{index, offset};
|
const GlobalMemoryBase descriptor{index, offset};
|
||||||
const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
|
const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
|
||||||
auto& usage = entry->second;
|
auto& usage = entry->second;
|
||||||
if (is_write) {
|
usage.is_written |= is_write;
|
||||||
usage.is_written = true;
|
usage.is_read |= is_read;
|
||||||
} else {
|
|
||||||
usage.is_read = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto real_address =
|
const auto real_address =
|
||||||
Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
|
Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
|
||||||
|
|
|
@ -394,7 +394,7 @@ private:
|
||||||
|
|
||||||
std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
|
std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
|
||||||
Tegra::Shader::Instruction instr,
|
Tegra::Shader::Instruction instr,
|
||||||
bool is_write);
|
bool is_read, bool is_write);
|
||||||
|
|
||||||
/// Register new amending code and obtain the reference id.
|
/// Register new amending code and obtain the reference id.
|
||||||
std::size_t DeclareAmend(Node new_amend);
|
std::size_t DeclareAmend(Node new_amend);
|
||||||
|
|
Loading…
Reference in New Issue