Refactor precise usage and add FMNMX, MUFU, FMUL32 and FADD332
This commit is contained in:
parent
d8d557df86
commit
e5ca097e32
|
@ -341,10 +341,10 @@ public:
|
||||||
*/
|
*/
|
||||||
void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
|
void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
|
||||||
u64 dest_num_components, u64 value_num_components,
|
u64 dest_num_components, u64 value_num_components,
|
||||||
bool is_saturated = false, u64 dest_elem = 0) {
|
bool is_saturated = false, u64 dest_elem = 0, bool precise = false) {
|
||||||
|
|
||||||
SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
|
SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
|
||||||
dest_num_components, value_num_components, dest_elem);
|
dest_num_components, value_num_components, dest_elem, precise);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -368,7 +368,7 @@ public:
|
||||||
const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
|
const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
|
||||||
|
|
||||||
SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
|
SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
|
||||||
dest_num_components, value_num_components, dest_elem);
|
dest_num_components, value_num_components, dest_elem, false);
|
||||||
|
|
||||||
if (sets_cc) {
|
if (sets_cc) {
|
||||||
const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
|
const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
|
||||||
|
@ -416,7 +416,7 @@ public:
|
||||||
}
|
}
|
||||||
}();
|
}();
|
||||||
|
|
||||||
SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem);
|
SetRegister(reg, elem, result, dest_num_components, value_num_components, dest_elem, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -757,7 +757,8 @@ private:
|
||||||
* @param dest_elem Optional, the destination element to use for the operation.
|
* @param dest_elem Optional, the destination element to use for the operation.
|
||||||
*/
|
*/
|
||||||
void SetRegister(const Register& reg, u64 elem, const std::string& value,
|
void SetRegister(const Register& reg, u64 elem, const std::string& value,
|
||||||
u64 dest_num_components, u64 value_num_components, u64 dest_elem) {
|
u64 dest_num_components, u64 value_num_components, u64 dest_elem,
|
||||||
|
bool precise) {
|
||||||
if (reg == Register::ZeroIndex) {
|
if (reg == Register::ZeroIndex) {
|
||||||
LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex");
|
LOG_CRITICAL(HW_GPU, "Cannot set Register::ZeroIndex");
|
||||||
UNREACHABLE();
|
UNREACHABLE();
|
||||||
|
@ -774,7 +775,18 @@ private:
|
||||||
src += GetSwizzle(elem);
|
src += GetSwizzle(elem);
|
||||||
}
|
}
|
||||||
|
|
||||||
shader.AddLine(dest + " = " + src + ';');
|
if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) {
|
||||||
|
shader.AddLine('{');
|
||||||
|
++shader.scope;
|
||||||
|
// This avoids optimizations of constant propagation and keeps the code as the original
|
||||||
|
// Sadly using the precise keyword causes "linking" errors on fragment shaders.
|
||||||
|
shader.AddLine("precise float tmp = " + src + ';');
|
||||||
|
shader.AddLine(dest + " = tmp;");
|
||||||
|
--shader.scope;
|
||||||
|
shader.AddLine('}');
|
||||||
|
} else {
|
||||||
|
shader.AddLine(dest + " = " + src + ';');
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Build the GLSL register list.
|
/// Build the GLSL register list.
|
||||||
|
@ -1511,24 +1523,8 @@ private:
|
||||||
|
|
||||||
op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
|
op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
|
||||||
|
|
||||||
shader.AddLine('{');
|
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
|
||||||
++shader.scope;
|
instr.alu.saturate_d, 0, true);
|
||||||
|
|
||||||
// This avoids optimizations of constant propagation and keeps the code as the original
|
|
||||||
// Sadly using the precise keyword causes "linking" errors on fragment shaders.
|
|
||||||
if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
|
|
||||||
shader.AddLine("float tmp = " + op_a + " * " + op_b + ';');
|
|
||||||
} else {
|
|
||||||
shader.AddLine("precise float tmp = " + op_a + " * " + op_b + ';');
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1,
|
|
||||||
instr.alu.saturate_d);
|
|
||||||
|
|
||||||
|
|
||||||
--shader.scope;
|
|
||||||
shader.AddLine('}');
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Id::FADD_C:
|
case OpCode::Id::FADD_C:
|
||||||
|
@ -1537,24 +1533,8 @@ private:
|
||||||
op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
|
op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
|
||||||
op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
|
op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
|
||||||
|
|
||||||
shader.AddLine('{');
|
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
|
||||||
++shader.scope;
|
instr.alu.saturate_d, 0, true);
|
||||||
|
|
||||||
// This avoids optimizations of constant propagation and keeps the code as the original
|
|
||||||
// Sadly using the precise keyword causes "linking" errors on fragment shaders.
|
|
||||||
if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
|
|
||||||
shader.AddLine("float tmp = " + op_a + " + " + op_b + ';');
|
|
||||||
} else {
|
|
||||||
shader.AddLine("precise float tmp = " + op_a + " + " + op_b + ';');
|
|
||||||
}
|
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1,
|
|
||||||
instr.alu.saturate_d);
|
|
||||||
|
|
||||||
|
|
||||||
--shader.scope;
|
|
||||||
shader.AddLine('}');
|
|
||||||
|
|
||||||
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Id::MUFU: {
|
case OpCode::Id::MUFU: {
|
||||||
|
@ -1562,31 +1542,31 @@ private:
|
||||||
switch (instr.sub_op) {
|
switch (instr.sub_op) {
|
||||||
case SubOp::Cos:
|
case SubOp::Cos:
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
|
regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
|
||||||
instr.alu.saturate_d);
|
instr.alu.saturate_d, 0, true);
|
||||||
break;
|
break;
|
||||||
case SubOp::Sin:
|
case SubOp::Sin:
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
|
regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
|
||||||
instr.alu.saturate_d);
|
instr.alu.saturate_d, 0, true);
|
||||||
break;
|
break;
|
||||||
case SubOp::Ex2:
|
case SubOp::Ex2:
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
|
regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
|
||||||
instr.alu.saturate_d);
|
instr.alu.saturate_d, 0, true);
|
||||||
break;
|
break;
|
||||||
case SubOp::Lg2:
|
case SubOp::Lg2:
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
|
regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
|
||||||
instr.alu.saturate_d);
|
instr.alu.saturate_d, 0, true);
|
||||||
break;
|
break;
|
||||||
case SubOp::Rcp:
|
case SubOp::Rcp:
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
|
regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
|
||||||
instr.alu.saturate_d);
|
instr.alu.saturate_d, 0, true);
|
||||||
break;
|
break;
|
||||||
case SubOp::Rsq:
|
case SubOp::Rsq:
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
|
regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
|
||||||
instr.alu.saturate_d);
|
instr.alu.saturate_d, 0, true);
|
||||||
break;
|
break;
|
||||||
case SubOp::Sqrt:
|
case SubOp::Sqrt:
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
|
regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
|
||||||
instr.alu.saturate_d);
|
instr.alu.saturate_d, 0, true);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
|
LOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {0:x}",
|
||||||
|
@ -1607,7 +1587,7 @@ private:
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0,
|
regs.SetRegisterToFloat(instr.gpr0, 0,
|
||||||
'(' + condition + ") ? min(" + parameters + ") : max(" +
|
'(' + condition + ") ? min(" + parameters + ") : max(" +
|
||||||
parameters + ')',
|
parameters + ')',
|
||||||
1, 1);
|
1, 1, false, 0, true);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Id::RRO_C:
|
case OpCode::Id::RRO_C:
|
||||||
|
@ -1636,7 +1616,7 @@ private:
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0,
|
regs.SetRegisterToFloat(instr.gpr0, 0,
|
||||||
regs.GetRegisterAsFloat(instr.gpr8) + " * " +
|
regs.GetRegisterAsFloat(instr.gpr8) + " * " +
|
||||||
GetImmediate32(instr),
|
GetImmediate32(instr),
|
||||||
1, 1, instr.fmul32.saturate);
|
1, 1, instr.fmul32.saturate, 0, true);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Id::FADD32I: {
|
case OpCode::Id::FADD32I: {
|
||||||
|
@ -1659,7 +1639,7 @@ private:
|
||||||
op_b = "-(" + op_b + ')';
|
op_b = "-(" + op_b + ')';
|
||||||
}
|
}
|
||||||
|
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1);
|
regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2121,23 +2101,9 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
shader.AddLine('{');
|
regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')',
|
||||||
++shader.scope;
|
1, 1, instr.alu.saturate_d, 0, true);
|
||||||
|
|
||||||
// This avoids optimizations of constant propagation and keeps the code as the original
|
|
||||||
// Sadly using the precise keyword causes "linking" errors on fragment shaders.
|
|
||||||
if (stage == Maxwell3D::Regs::ShaderStage::Fragment) {
|
|
||||||
shader.AddLine("float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");");
|
|
||||||
} else {
|
|
||||||
shader.AddLine("precise float tmp = fma(" + op_a + ", " + op_b + ", " + op_c + ");");
|
|
||||||
}
|
|
||||||
|
|
||||||
regs.SetRegisterToFloat(instr.gpr0, 0, "tmp", 1, 1,
|
|
||||||
instr.alu.saturate_d);
|
|
||||||
|
|
||||||
|
|
||||||
--shader.scope;
|
|
||||||
shader.AddLine('}');
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Type::Hfma2: {
|
case OpCode::Type::Hfma2: {
|
||||||
|
|
|
@ -15,8 +15,7 @@ static constexpr u32 PROGRAM_OFFSET{10};
|
||||||
|
|
||||||
ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
|
ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
|
||||||
std::string out = "#version 430 core\n";
|
std::string out = "#version 430 core\n";
|
||||||
out += "#extension GL_ARB_separate_shader_objects : enable\n";
|
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
|
||||||
out += "#extension GL_ARB_gpu_shader5 : enable\n\n";
|
|
||||||
out += Decompiler::GetCommonDeclarations();
|
out += Decompiler::GetCommonDeclarations();
|
||||||
|
|
||||||
out += R"(
|
out += R"(
|
||||||
|
@ -84,8 +83,7 @@ void main() {
|
||||||
|
|
||||||
ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
|
ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
|
||||||
std::string out = "#version 430 core\n";
|
std::string out = "#version 430 core\n";
|
||||||
out += "#extension GL_ARB_separate_shader_objects : enable\n";
|
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
|
||||||
out += "#extension GL_ARB_gpu_shader5 : enable\n\n";
|
|
||||||
out += Decompiler::GetCommonDeclarations();
|
out += Decompiler::GetCommonDeclarations();
|
||||||
out += "bool exec_geometry();\n";
|
out += "bool exec_geometry();\n";
|
||||||
|
|
||||||
|
@ -119,8 +117,7 @@ void main() {
|
||||||
|
|
||||||
ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
|
ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
|
||||||
std::string out = "#version 430 core\n";
|
std::string out = "#version 430 core\n";
|
||||||
out += "#extension GL_ARB_separate_shader_objects : enable\n";
|
out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
|
||||||
out += "#extension GL_ARB_gpu_shader5 : enable\n\n";
|
|
||||||
out += Decompiler::GetCommonDeclarations();
|
out += Decompiler::GetCommonDeclarations();
|
||||||
out += "bool exec_fragment();\n";
|
out += "bool exec_fragment();\n";
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue