[llvm] [CodeGen] Do not use subsituteRegister to update implicit def (PR #148068)
Peiming Liu via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 11 08:59:50 PDT 2025
https://github.com/PeimingLiu updated https://github.com/llvm/llvm-project/pull/148068
>From db3674ad0e538e106788331297e3b96d1766cd08 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at modular.com>
Date: Thu, 10 Jul 2025 15:02:07 -0700
Subject: [PATCH 1/3] [CodeGen] Do not use subsituteRegister to update implicit
def
---
llvm/lib/CodeGen/TargetInstrInfo.cpp | 29 ++++++++++++++++++++--------
1 file changed, 21 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 660a1a4d7ec47..ce4234bf8a007 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -228,6 +228,24 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
SubReg0 = SubReg1;
}
+ // For a case like this:
+ // %0.sub = INST %0.sub(tied), %1.sub, implicit-def %0
+ // we need to update the implicit-def after commuting to result in:
+ // %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1
+ SmallVector<unsigned> UpdateImplicitDefIdx;
+ if (HasDef && MI.hasImplicitDef() && MI.getOperand(0).getReg() != Reg0) {
+ const TargetRegisterInfo *TRI =
+ MI.getMF()->getSubtarget().getRegisterInfo();
+ Register OrigReg0 = MI.getOperand(0).getReg();
+ for (auto [OpNo, MO] : llvm::enumerate(MI.implicit_operands())) {
+ Register ImplReg = MO.getReg();
+ if ((ImplReg.isVirtual() && ImplReg == OrigReg0) ||
+ (ImplReg.isPhysical() && OrigReg0.isPhysical() &&
+ TRI->isSubRegisterEq(ImplReg, OrigReg0)))
+ UpdateImplicitDefIdx.push_back(OpNo + MI.getNumExplicitOperands());
+ }
+ }
+
MachineInstr *CommutedMI = nullptr;
if (NewMI) {
// Create a new instruction.
@@ -238,15 +256,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
}
if (HasDef) {
- // Use `substituteRegister` so that for a case like this:
- // %0.sub = INST %0.sub(tied), %1.sub, implicit-def %0
- // the implicit-def is also updated, to result in:
- // %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1
- const TargetRegisterInfo &TRI =
- *MI.getMF()->getSubtarget().getRegisterInfo();
- Register FromReg = CommutedMI->getOperand(0).getReg();
- CommutedMI->substituteRegister(FromReg, Reg0, /*SubRegIdx=*/0, TRI);
+ CommutedMI->getOperand(0).setReg(Reg0);
CommutedMI->getOperand(0).setSubReg(SubReg0);
+ for (unsigned Idx : UpdateImplicitDefIdx)
+ CommutedMI->getOperand(Idx).setReg(Reg0);
}
CommutedMI->getOperand(Idx2).setReg(Reg1);
CommutedMI->getOperand(Idx1).setReg(Reg2);
>From 7a347d8410ed566e87f169ec363325fbca99f817 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at modular.com>
Date: Thu, 10 Jul 2025 19:10:36 -0700
Subject: [PATCH 2/3] add test
---
.../X86/coalesce-commutative-implicit-def.mir | 21 +++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git a/llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir b/llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir
index fe1235fe94f85..1f38430f631cc 100644
--- a/llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir
+++ b/llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir
@@ -35,3 +35,24 @@ body: |
%0:gr64_with_sub_8bit = COPY %1:gr64_with_sub_8bit
RET 0, implicit %0
...
+# Commuting instruction with 3 ops is handled correctly.
+---
+name: commuting_3_ops
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $ymm0, $ymm1
+
+ ; CHECK-LABEL: name: commuting_3_ops
+ ; CHECK: liveins: $ymm0, $ymm1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr256 = COPY $ymm1
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256 = COPY $ymm0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256 = contract nofpexcept VFMADD213PSYr [[COPY1]], [[COPY]], [[COPY]], implicit $mxcsr
+ ; CHECK-NEXT: RET 0, implicit [[COPY1]]
+ %0:vr256 = COPY $ymm1
+ %1:vr256 = COPY $ymm0
+ %0:vr256 = contract nofpexcept VFMADD231PSYr %0:vr256, %0:vr256, %1:vr256, implicit $mxcsr
+ %1:vr256 = COPY %0:vr256
+ RET 0, implicit %1
+...
>From b6740c7d1556548f6a795b3711890c2abe6da0d8 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at modular.com>
Date: Fri, 11 Jul 2025 08:59:37 -0700
Subject: [PATCH 3/3] address comment
---
llvm/lib/CodeGen/TargetInstrInfo.cpp | 126 +++++++++++++--------------
1 file changed, 63 insertions(+), 63 deletions(-)
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index ce4234bf8a007..badb1c43dd6ad 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -40,8 +40,8 @@
using namespace llvm;
static cl::opt<bool> DisableHazardRecognizer(
- "disable-sched-hazard", cl::Hidden, cl::init(false),
- cl::desc("Disable hazard detection during preRA scheduling"));
+ "disable-sched-hazard", cl::Hidden, cl::init(false),
+ cl::desc("Disable hazard detection during preRA scheduling"));
static cl::opt<bool> EnableAccReassociation(
"acc-reassoc", cl::Hidden, cl::init(true),
@@ -58,7 +58,7 @@ static cl::opt<unsigned int> MaxAccumulatorWidth(
TargetInstrInfo::~TargetInstrInfo() = default;
-const TargetRegisterClass*
+const TargetRegisterClass *
TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
const TargetRegisterInfo *TRI,
const MachineFunction &MF) const {
@@ -112,9 +112,9 @@ static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
/// simple--i.e. not a logical or arithmetic expression--size values without
/// the optional fill value. This is primarily used for creating arbitrary
/// sized inline asm blocks for testing purposes.
-unsigned TargetInstrInfo::getInlineAsmLength(
- const char *Str,
- const MCAsmInfo &MAI, const TargetSubtargetInfo *STI) const {
+unsigned
+TargetInstrInfo::getInlineAsmLength(const char *Str, const MCAsmInfo &MAI,
+ const TargetSubtargetInfo *STI) const {
// Count the number of instructions in the asm.
bool AtInsnStart = true;
unsigned Length = 0;
@@ -152,9 +152,8 @@ unsigned TargetInstrInfo::getInlineAsmLength(
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
/// after it, replacing it with an unconditional branch to NewDest.
-void
-TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
- MachineBasicBlock *NewDest) const {
+void TargetInstrInfo::ReplaceTailWithBranchTo(
+ MachineBasicBlock::iterator Tail, MachineBasicBlock *NewDest) const {
MachineBasicBlock *MBB = Tail->getParent();
// Remove all the old successors of MBB from the CFG.
@@ -188,8 +187,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
// No idea how to commute this instruction. Target should implement its own.
return nullptr;
- unsigned CommutableOpIdx1 = Idx1; (void)CommutableOpIdx1;
- unsigned CommutableOpIdx2 = Idx2; (void)CommutableOpIdx2;
+ unsigned CommutableOpIdx1 = Idx1;
+ (void)CommutableOpIdx1;
+ unsigned CommutableOpIdx2 = Idx2;
+ (void)CommutableOpIdx2;
assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) &&
CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 &&
"TargetInstrInfo::CommuteInstructionImpl(): not commutable operands.");
@@ -214,38 +215,38 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
Reg1.isPhysical() ? MI.getOperand(Idx1).isRenamable() : false;
bool Reg2IsRenamable =
Reg2.isPhysical() ? MI.getOperand(Idx2).isRenamable() : false;
- // If destination is tied to either of the commuted source register, then
- // it must be updated.
- if (HasDef && Reg0 == Reg1 &&
- MI.getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
- Reg2IsKill = false;
- Reg0 = Reg2;
- SubReg0 = SubReg2;
- } else if (HasDef && Reg0 == Reg2 &&
- MI.getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
- Reg1IsKill = false;
- Reg0 = Reg1;
- SubReg0 = SubReg1;
- }
// For a case like this:
// %0.sub = INST %0.sub(tied), %1.sub, implicit-def %0
// we need to update the implicit-def after commuting to result in:
// %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1
SmallVector<unsigned> UpdateImplicitDefIdx;
- if (HasDef && MI.hasImplicitDef() && MI.getOperand(0).getReg() != Reg0) {
+ if (HasDef && MI.hasImplicitDef() && MI.getOperand(0).getReg() == Reg0) {
const TargetRegisterInfo *TRI =
MI.getMF()->getSubtarget().getRegisterInfo();
- Register OrigReg0 = MI.getOperand(0).getReg();
for (auto [OpNo, MO] : llvm::enumerate(MI.implicit_operands())) {
Register ImplReg = MO.getReg();
- if ((ImplReg.isVirtual() && ImplReg == OrigReg0) ||
- (ImplReg.isPhysical() && OrigReg0.isPhysical() &&
- TRI->isSubRegisterEq(ImplReg, OrigReg0)))
+ if ((ImplReg.isVirtual() && ImplReg == Reg0) ||
+ (ImplReg.isPhysical() && Reg0.isPhysical() &&
+ TRI->isSubRegisterEq(ImplReg, Reg0)))
UpdateImplicitDefIdx.push_back(OpNo + MI.getNumExplicitOperands());
}
}
+ // If destination is tied to either of the commuted source register, then
+ // it must be updated.
+ if (HasDef && Reg0 == Reg1 &&
+ MI.getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
+ Reg2IsKill = false;
+ Reg0 = Reg2;
+ SubReg0 = SubReg2;
+ } else if (HasDef && Reg0 == Reg2 &&
+ MI.getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
+ Reg1IsKill = false;
+ Reg0 = Reg1;
+ SubReg0 = SubReg1;
+ }
+
MachineInstr *CommutedMI = nullptr;
if (NewMI) {
// Create a new instruction.
@@ -340,8 +341,8 @@ bool TargetInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
// is not true, then the target must implement this.
unsigned CommutableOpIdx1 = MCID.getNumDefs();
unsigned CommutableOpIdx2 = CommutableOpIdx1 + 1;
- if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
- CommutableOpIdx1, CommutableOpIdx2))
+ if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
+ CommutableOpIdx2))
return false;
if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg())
@@ -351,7 +352,8 @@ bool TargetInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
}
bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
- if (!MI.isTerminator()) return false;
+ if (!MI.isTerminator())
+ return false;
// Conditional branch is a special case.
if (MI.isBranch() && !MI.isBarrier())
@@ -488,7 +490,7 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
assert(TII.isCopyInstr(MI) && "MI must be a COPY instruction");
if (MI.getNumOperands() != 2)
return nullptr;
- assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
+ assert(FoldIdx < 2 && "FoldIdx refers no nonexistent operand");
const MachineOperand &FoldOp = MI.getOperand(FoldIdx);
const MachineOperand &LiveOp = MI.getOperand(1 - FoldIdx);
@@ -617,8 +619,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
unsigned SpillSize;
unsigned SpillOffset;
// Compute the spill slot size and offset.
- const TargetRegisterClass *RC =
- MF.getRegInfo().getRegClass(MO.getReg());
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(MO.getReg());
bool Valid =
TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF);
if (!Valid)
@@ -763,11 +764,9 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
if (NewMI) {
NewMI->setMemRefs(MF, MI.memoperands());
// Add a memory operand, foldMemoryOperandImpl doesn't do that.
- assert((!(Flags & MachineMemOperand::MOStore) ||
- NewMI->mayStore()) &&
+ assert((!(Flags & MachineMemOperand::MOStore) || NewMI->mayStore()) &&
"Folded a def to a non-store!");
- assert((!(Flags & MachineMemOperand::MOLoad) ||
- NewMI->mayLoad()) &&
+ assert((!(Flags & MachineMemOperand::MOLoad) || NewMI->mayLoad()) &&
"Folded a use to a non-load!");
assert(MFI.getObjectOffset(FI) != -1);
MachineMemOperand *MMO =
@@ -1623,7 +1622,8 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializable(
// If any of the registers accessed are non-constant, conservatively assume
// the instruction is not rematerializable.
for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg()) continue;
+ if (!MO.isReg())
+ continue;
Register Reg = MO.getReg();
if (Reg == 0)
continue;
@@ -1663,7 +1663,7 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const {
const MachineFunction *MF = MI.getMF();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
bool StackGrowsDown =
- TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+ TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
@@ -1711,9 +1711,9 @@ bool TargetInstrInfo::usePreRAHazardRecognizer() const {
}
// Default implementation of CreateTargetRAHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfo::
-CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
- const ScheduleDAG *DAG) const {
+ScheduleHazardRecognizer *
+TargetInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+ const ScheduleDAG *DAG) const {
// Dummy hazard recognizer allows all instructions to issue.
return new ScheduleHazardRecognizer();
}
@@ -1725,9 +1725,8 @@ ScheduleHazardRecognizer *TargetInstrInfo::CreateTargetMIHazardRecognizer(
}
// Default implementation of CreateTargetPostRAHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfo::
-CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
- const ScheduleDAG *DAG) const {
+ScheduleHazardRecognizer *TargetInstrInfo::CreateTargetPostRAHazardRecognizer(
+ const InstrItineraryData *II, const ScheduleDAG *DAG) const {
return new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
}
@@ -1912,7 +1911,8 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
// TODO: Can currently only handle mem instructions with a single define.
// An example from the x86 target:
// ...
- // DIV64m $rsp, 1, $noreg, 24, $noreg, implicit-def dead $rax, implicit-def $rdx
+ // DIV64m $rsp, 1, $noreg, 24, $noreg, implicit-def dead $rax,
+ // implicit-def $rdx
// ...
//
if (MI.getNumExplicitDefs() != 1)
@@ -1961,8 +1961,8 @@ std::optional<unsigned> TargetInstrInfo::getOperandLatency(
bool TargetInstrInfo::getRegSequenceInputs(
const MachineInstr &MI, unsigned DefIdx,
SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
- assert((MI.isRegSequence() ||
- MI.isRegSequenceLike()) && "Instruction do not have the proper type");
+ assert((MI.isRegSequence() || MI.isRegSequenceLike()) &&
+ "Instruction do not have the proper type");
if (!MI.isRegSequence())
return getRegSequenceLikeInputs(MI, DefIdx, InputRegs);
@@ -1988,8 +1988,8 @@ bool TargetInstrInfo::getRegSequenceInputs(
bool TargetInstrInfo::getExtractSubregInputs(
const MachineInstr &MI, unsigned DefIdx,
RegSubRegPairAndIdx &InputReg) const {
- assert((MI.isExtractSubreg() ||
- MI.isExtractSubregLike()) && "Instruction do not have the proper type");
+ assert((MI.isExtractSubreg() || MI.isExtractSubregLike()) &&
+ "Instruction do not have the proper type");
if (!MI.isExtractSubreg())
return getExtractSubregLikeInputs(MI, DefIdx, InputReg);
@@ -2011,10 +2011,10 @@ bool TargetInstrInfo::getExtractSubregInputs(
}
bool TargetInstrInfo::getInsertSubregInputs(
- const MachineInstr &MI, unsigned DefIdx,
- RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const {
- assert((MI.isInsertSubreg() ||
- MI.isInsertSubregLike()) && "Instruction do not have the proper type");
+ const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
+ RegSubRegPairAndIdx &InsertedReg) const {
+ assert((MI.isInsertSubreg() || MI.isInsertSubregLike()) &&
+ "Instruction do not have the proper type");
if (!MI.isInsertSubreg())
return getInsertSubregLikeInputs(MI, DefIdx, BaseReg, InsertedReg);
@@ -2145,13 +2145,13 @@ TargetInstrInfo::getOutliningType(const MachineModuleInfo &MMI,
// Some other special cases.
switch (MI.getOpcode()) {
- case TargetOpcode::IMPLICIT_DEF:
- case TargetOpcode::KILL:
- case TargetOpcode::LIFETIME_START:
- case TargetOpcode::LIFETIME_END:
- return outliner::InstrType::Invisible;
- default:
- break;
+ case TargetOpcode::IMPLICIT_DEF:
+ case TargetOpcode::KILL:
+ case TargetOpcode::LIFETIME_START:
+ case TargetOpcode::LIFETIME_END:
+ return outliner::InstrType::Invisible;
+ default:
+ break;
}
// Is this a terminator for a basic block?
More information about the llvm-commits
mailing list