diff --git a/src/shader_recompiler/backend/glasm/emit_glasm.cpp b/src/shader_recompiler/backend/glasm/emit_glasm.cpp index 2ce839059..4aa3682c2 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm.cpp @@ -203,7 +203,13 @@ void Precolor(EmitContext& ctx, const IR::Program& program) { for (size_t i = 0; i < num_args; ++i) { IR::Block& phi_block{*phi.PhiBlock(i)}; auto it{std::find_if_not(phi_block.rbegin(), phi_block.rend(), IsReference).base()}; - IR::IREmitter{phi_block, it}.PhiMove(phi, phi.Arg(i)); + IR::IREmitter ir{phi_block, it}; + const IR::Value arg{phi.Arg(i)}; + if (arg.IsImmediate()) { + ir.PhiMove(phi, arg); + } else { + ir.PhiMove(phi, IR::Value{&RegAlloc::AliasInst(*arg.Inst())}); + } } for (size_t i = 0; i < num_args; ++i) { IR::IREmitter{*phi.PhiBlock(i)}.Reference(IR::Value{&phi}); diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp index 808c72105..9201ccd39 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_bitwise_conversion.cpp @@ -23,7 +23,13 @@ void EmitIdentity(EmitContext&, IR::Inst& inst, const IR::Value& value) { } void EmitConditionRef(EmitContext& ctx, IR::Inst& inst, const IR::Value& value) { - ctx.Add("MOV.S {},{};", inst, ScalarS32{ctx.reg_alloc.Consume(value)}); + // Fake one usage to get a real register out of the condition + inst.DestructiveAddUsage(1); + const Register ret{ctx.reg_alloc.Define(inst)}; + const ScalarS32 input{ctx.reg_alloc.Consume(value)}; + if (ret != input) { + ctx.Add("MOV.S {},{};", ret, input); + } } void EmitBitCastU16F16(EmitContext&, IR::Inst& inst, const IR::Value& value) { diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp index d829f05b3..bff0b7c1c 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_composite.cpp @@ -52,7 +52,9 @@ void CompositeInsert(EmitContext& ctx, IR::Inst& inst, Register composite, Objec // The input composite is not aliased with the return value so we have to copy it before // hand. But the insert object is not aliased with the return value, so we don't have to // worry about that - ctx.Add("MOV.{} {},{};MOV.{} {}.{},{};", type, ret, composite, type, ret, swizzle, object); + ctx.Add("MOV.{} {},{};" + "MOV.{} {}.{},{};", + type, ret, composite, type, ret, swizzle, object); } else { // The return value is alised so we can just insert the object, it doesn't matter if it's // aliased diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp index a7def0897..34725b8c6 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_image.cpp @@ -181,7 +181,6 @@ void StoreSparse(EmitContext& ctx, IR::Inst* sparse_inst) { ctx.Add("MOV.S {},-1;" "MOV.S {}(NONRESIDENT),0;", sparse_ret, sparse_ret); - sparse_inst->Invalidate(); } std::string_view FormatStorage(ImageFormat format) { @@ -215,12 +214,20 @@ void ImageAtomic(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Regis const Register ret{ctx.reg_alloc.Define(inst)}; ctx.Add("ATOMIM.{} {},{},{},{},{};", op, ret, value, coord, image, type); } + +IR::Inst* PrepareSparse(IR::Inst& inst) { + const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + if (sparse_inst) { + sparse_inst->Invalidate(); + } + return sparse_inst; +} } // Anonymous namespace void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, Register bias_lc, const IR::Value& offset) { const auto info{inst.Flags()}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view lod_clamp_mod{info.has_lod_clamp ? ".LODCLAMP" : ""}; const std::string_view type{TextureType(info)}; @@ -259,7 +266,7 @@ void EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Valu void EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, ScalarF32 lod, const IR::Value& offset) { const auto info{inst.Flags()}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -288,7 +295,7 @@ void EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: } const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; const Register bias_lc_vec{ctx.reg_alloc.Consume(bias_lc)}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -393,7 +400,7 @@ void EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst& inst, const IR:: } const ScalarF32 dref_val{ctx.reg_alloc.Consume(dref)}; const ScalarF32 lod_val{ctx.reg_alloc.Consume(lod)}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -436,7 +443,7 @@ void EmitImageGather(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; const auto info{inst.Flags()}; const char comp{"xyzw"[info.gather_component]}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -462,7 +469,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde // Allocate offsets early so they don't overwrite any consumed register const auto [off_x, off_y]{AllocOffsetsRegs(ctx, offset2)}; const auto info{inst.Flags()}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -500,7 +507,7 @@ void EmitImageGatherDref(EmitContext& ctx, IR::Inst& inst, const IR::Value& inde void EmitImageFetch(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, const IR::Value& coord, const IR::Value& offset, ScalarS32 lod, ScalarS32 ms) { const auto info{inst.Flags()}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -547,7 +554,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, dpdx = ScopedRegister{ctx.reg_alloc}; dpdy = ScopedRegister{ctx.reg_alloc}; } - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; const std::string texture{Texture(ctx, info, index)}; @@ -581,7 +588,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, void EmitImageRead(EmitContext& ctx, IR::Inst& inst, const IR::Value& index, Register coord) { const auto info{inst.Flags()}; - const auto sparse_inst{inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)}; + const auto sparse_inst{PrepareSparse(inst)}; const std::string_view format{FormatStorage(info.image_format)}; const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""}; const std::string_view type{TextureType(info)}; diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp index e5aac14c8..e9d1bae05 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_integer.cpp @@ -9,6 +9,17 @@ namespace Shader::Backend::GLASM { void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { + const std::array flags{ + inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), + inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), + }; + for (IR::Inst* const flag_inst : flags) { + if (flag_inst) { + flag_inst->Invalidate(); + } + } const bool cc{inst.HasAssociatedPseudoOperation()}; const std::string_view cc_mod{cc ? ".CC" : ""}; if (cc) { @@ -19,20 +30,22 @@ void EmitIAdd32(EmitContext& ctx, IR::Inst& inst, ScalarS32 a, ScalarS32 b) { if (!cc) { return; } - static constexpr std::array masks{"EQ", "SF", "CF", "OF"}; - const std::array flags{ - inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp), - inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp), - inst.GetAssociatedPseudoOperation(IR::Opcode::GetCarryFromOp), - inst.GetAssociatedPseudoOperation(IR::Opcode::GetOverflowFromOp), - }; - for (size_t i = 0; i < flags.size(); ++i) { - if (flags[i]) { - const auto flag_ret{ctx.reg_alloc.Define(*flags[i])}; - ctx.Add("MOV.S {},0;" - "MOV.S {}({}.x),-1;", - flag_ret, flag_ret, masks[i]); - flags[i]->Invalidate(); + static constexpr std::array masks{"", "SF", "CF", "OF"}; + for (size_t flag_index = 0; flag_index < flags.size(); ++flag_index) { + if (!flags[flag_index]) { + continue; + } + const auto flag_ret{ctx.reg_alloc.Define(*flags[flag_index])}; + if (flag_index == 0) { + ctx.Add("SEQ.S {}.x,{}.x,0;", flag_ret, ret); + } else { + // We could use conditional execution here, but it's broken on Nvidia's compiler + ctx.Add("IF {}.x;" + "MOV.S {}.x,-1;" + "ELSE;" + "MOV.S {}.x,0;" + "ENDIF;", + masks[flag_index], flag_ret, flag_ret); } } } @@ -136,6 +149,17 @@ void EmitBitFieldSExtract(EmitContext& ctx, IR::Inst& inst, ScalarS32 base, Scal void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, ScalarU32 offset, ScalarU32 count) { + const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp); + const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp); + if (zero) { + zero->Invalidate(); + } + if (sign) { + sign->Invalidate(); + } + if (zero || sign) { + ctx.reg_alloc.InvalidateConditionCodes(); + } const Register ret{ctx.reg_alloc.Define(inst)}; if (count.type != Type::Register && offset.type != Type::Register) { ctx.Add("BFE.U {},{{{},{},0,0}},{};", ret, count, offset, base); @@ -145,13 +169,11 @@ void EmitBitFieldUExtract(EmitContext& ctx, IR::Inst& inst, ScalarU32 base, Scal "BFE.U {},RC,{};", count, offset, ret, base); } - if (const auto zero = inst.GetAssociatedPseudoOperation(IR::Opcode::GetZeroFromOp)) { + if (zero) { ctx.Add("SEQ.S {},{},0;", *zero, ret); - zero->Invalidate(); } - if (const auto sign = inst.GetAssociatedPseudoOperation(IR::Opcode::GetSignFromOp)) { + if (sign) { ctx.Add("SLT.S {},{},0;", *sign, ret); - sign->Invalidate(); } } diff --git a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp index af0e13d43..6e30790bb 100644 --- a/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp +++ b/src/shader_recompiler/backend/glasm/emit_glasm_warp.cpp @@ -51,6 +51,10 @@ void EmitSubgroupGeMask(EmitContext& ctx, IR::Inst& inst) { static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 index, const IR::Value& clamp, const IR::Value& segmentation_mask, std::string_view op) { + IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (in_bounds) { + in_bounds->Invalidate(); + } std::string mask; if (clamp.IsImmediate() && segmentation_mask.IsImmediate()) { mask = fmt::to_string(clamp.U32() | (segmentation_mask.U32() << 8)); @@ -61,13 +65,11 @@ static void Shuffle(EmitContext& ctx, IR::Inst& inst, ScalarU32 value, ScalarU32 ScalarU32{ctx.reg_alloc.Consume(clamp)}); } const Register value_ret{ctx.reg_alloc.Define(inst)}; - IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; if (in_bounds) { const Register bounds_ret{ctx.reg_alloc.Define(*in_bounds)}; ctx.Add("SHF{}.U {},{},{},{};" "MOV.U {}.x,{}.y;", op, bounds_ret, value, index, mask, value_ret, bounds_ret); - in_bounds->Invalidate(); } else { ctx.Add("SHF{}.U {},{},{},{};" "MOV.U {}.x,{}.y;", diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.cpp b/src/shader_recompiler/backend/glasm/reg_alloc.cpp index 707b22247..c55a833c6 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glasm/reg_alloc.cpp @@ -22,11 +22,19 @@ Register RegAlloc::LongDefine(IR::Inst& inst) { } Value RegAlloc::Peek(const IR::Value& value) { - return value.IsImmediate() ? MakeImm(value) : PeekInst(*value.InstRecursive()); + if (value.IsImmediate()) { + return MakeImm(value); + } else { + return PeekInst(*value.Inst()); + } } Value RegAlloc::Consume(const IR::Value& value) { - return value.IsImmediate() ? MakeImm(value) : ConsumeInst(*value.InstRecursive()); + if (value.IsImmediate()) { + return MakeImm(value); + } else { + return ConsumeInst(*value.Inst()); + } } void RegAlloc::Unref(IR::Inst& inst) { @@ -88,7 +96,14 @@ Value RegAlloc::MakeImm(const IR::Value& value) { } Register RegAlloc::Define(IR::Inst& inst, bool is_long) { - inst.SetDefinition(Alloc(is_long)); + if (inst.HasUses()) { + inst.SetDefinition(Alloc(is_long)); + } else { + Id id{}; + id.is_long.Assign(is_long ? 1 : 0); + id.is_null.Assign(1); + inst.SetDefinition(id); + } return Register{PeekInst(inst)}; } @@ -115,10 +130,12 @@ Id RegAlloc::Alloc(bool is_long) { num_regs = std::max(num_regs, reg + 1); use[reg] = true; Id ret{}; - ret.index.Assign(static_cast(reg)); + ret.is_valid.Assign(1); ret.is_long.Assign(is_long ? 1 : 0); ret.is_spill.Assign(0); ret.is_condition_code.Assign(0); + ret.is_null.Assign(0); + ret.index.Assign(static_cast(reg)); return ret; } } @@ -126,6 +143,9 @@ Id RegAlloc::Alloc(bool is_long) { } void RegAlloc::Free(Id id) { + if (id.is_valid == 0) { + throw LogicError("Freeing invalid register"); + } if (id.is_spill != 0) { throw NotImplementedException("Free spill"); } diff --git a/src/shader_recompiler/backend/glasm/reg_alloc.h b/src/shader_recompiler/backend/glasm/reg_alloc.h index 41b7c92be..b97c84146 100644 --- a/src/shader_recompiler/backend/glasm/reg_alloc.h +++ b/src/shader_recompiler/backend/glasm/reg_alloc.h @@ -35,10 +35,12 @@ enum class Type : u32 { struct Id { union { u32 raw; - BitField<0, 29, u32> index; - BitField<29, 1, u32> is_long; - BitField<30, 1, u32> is_spill; - BitField<31, 1, u32> is_condition_code; + BitField<0, 1, u32> is_valid; + BitField<1, 1, u32> is_long; + BitField<2, 1, u32> is_spill; + BitField<3, 1, u32> is_condition_code; + BitField<4, 1, u32> is_null; + BitField<5, 27, u32> index; }; bool operator==(Id rhs) const noexcept { @@ -164,12 +166,18 @@ auto FormatTo(FormatContext& ctx, Id id) { throw NotImplementedException("Spill emission"); } if constexpr (scalar) { + if (id.is_null != 0) { + return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC.x" : "RC.x"); + } if (id.is_long != 0) { return fmt::format_to(ctx.out(), "D{}.x", id.index.Value()); } else { return fmt::format_to(ctx.out(), "R{}.x", id.index.Value()); } } else { + if (id.is_null != 0) { + return fmt::format_to(ctx.out(), "{}", id.is_long != 0 ? "DC" : "RC"); + } if (id.is_long != 0) { return fmt::format_to(ctx.out(), "D{}", id.index.Value()); } else {