[llvm] [CodeGen] Do not use subsituteRegister to update implicit def (PR #148068)

Peiming Liu via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 11 08:59:50 PDT 2025


https://github.com/PeimingLiu updated https://github.com/llvm/llvm-project/pull/148068

>From db3674ad0e538e106788331297e3b96d1766cd08 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at modular.com>
Date: Thu, 10 Jul 2025 15:02:07 -0700
Subject: [PATCH 1/3] [CodeGen] Do not use subsituteRegister to update implicit
 def

---
 llvm/lib/CodeGen/TargetInstrInfo.cpp | 29 ++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 660a1a4d7ec47..ce4234bf8a007 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -228,6 +228,24 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
     SubReg0 = SubReg1;
   }
 
+  // For a case like this:
+  //   %0.sub = INST %0.sub(tied), %1.sub, implicit-def %0
+  // we need to update the implicit-def after commuting to result in:
+  //   %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1
+  SmallVector<unsigned> UpdateImplicitDefIdx;
+  if (HasDef && MI.hasImplicitDef() && MI.getOperand(0).getReg() != Reg0) {
+    const TargetRegisterInfo *TRI =
+        MI.getMF()->getSubtarget().getRegisterInfo();
+    Register OrigReg0 = MI.getOperand(0).getReg();
+    for (auto [OpNo, MO] : llvm::enumerate(MI.implicit_operands())) {
+      Register ImplReg = MO.getReg();
+      if ((ImplReg.isVirtual() && ImplReg == OrigReg0) ||
+          (ImplReg.isPhysical() && OrigReg0.isPhysical() &&
+           TRI->isSubRegisterEq(ImplReg, OrigReg0)))
+        UpdateImplicitDefIdx.push_back(OpNo + MI.getNumExplicitOperands());
+    }
+  }
+
   MachineInstr *CommutedMI = nullptr;
   if (NewMI) {
     // Create a new instruction.
@@ -238,15 +256,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
   }
 
   if (HasDef) {
-    // Use `substituteRegister` so that for a case like this:
-    //   %0.sub = INST %0.sub(tied), %1.sub, implicit-def %0
-    // the implicit-def is also updated, to result in:
-    //   %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1
-    const TargetRegisterInfo &TRI =
-        *MI.getMF()->getSubtarget().getRegisterInfo();
-    Register FromReg = CommutedMI->getOperand(0).getReg();
-    CommutedMI->substituteRegister(FromReg, Reg0, /*SubRegIdx=*/0, TRI);
+    CommutedMI->getOperand(0).setReg(Reg0);
     CommutedMI->getOperand(0).setSubReg(SubReg0);
+    for (unsigned Idx : UpdateImplicitDefIdx)
+      CommutedMI->getOperand(Idx).setReg(Reg0);
   }
   CommutedMI->getOperand(Idx2).setReg(Reg1);
   CommutedMI->getOperand(Idx1).setReg(Reg2);

>From 7a347d8410ed566e87f169ec363325fbca99f817 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at modular.com>
Date: Thu, 10 Jul 2025 19:10:36 -0700
Subject: [PATCH 2/3] add test

---
 .../X86/coalesce-commutative-implicit-def.mir | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir b/llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir
index fe1235fe94f85..1f38430f631cc 100644
--- a/llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir
+++ b/llvm/test/CodeGen/X86/coalesce-commutative-implicit-def.mir
@@ -35,3 +35,24 @@ body: |
     %0:gr64_with_sub_8bit = COPY %1:gr64_with_sub_8bit
     RET 0, implicit %0
 ...
+# Commuting instruction with 3 ops is handled correctly.
+---
+name: commuting_3_ops
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $ymm0, $ymm1
+
+    ; CHECK-LABEL: name: commuting_3_ops
+    ; CHECK: liveins: $ymm0, $ymm1
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vr256 = COPY $ymm1
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256 = COPY $ymm0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vr256 = contract nofpexcept VFMADD213PSYr [[COPY1]], [[COPY]], [[COPY]], implicit $mxcsr
+    ; CHECK-NEXT: RET 0, implicit [[COPY1]]
+    %0:vr256 = COPY $ymm1
+    %1:vr256 = COPY $ymm0
+    %0:vr256 = contract nofpexcept VFMADD231PSYr %0:vr256, %0:vr256, %1:vr256, implicit $mxcsr
+    %1:vr256 = COPY %0:vr256
+    RET 0, implicit %1
+...

>From b6740c7d1556548f6a795b3711890c2abe6da0d8 Mon Sep 17 00:00:00 2001
From: Peiming Liu <peiming at modular.com>
Date: Fri, 11 Jul 2025 08:59:37 -0700
Subject: [PATCH 3/3] address comment

---
 llvm/lib/CodeGen/TargetInstrInfo.cpp | 126 +++++++++++++--------------
 1 file changed, 63 insertions(+), 63 deletions(-)

diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index ce4234bf8a007..badb1c43dd6ad 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -40,8 +40,8 @@
 using namespace llvm;
 
 static cl::opt<bool> DisableHazardRecognizer(
-  "disable-sched-hazard", cl::Hidden, cl::init(false),
-  cl::desc("Disable hazard detection during preRA scheduling"));
+    "disable-sched-hazard", cl::Hidden, cl::init(false),
+    cl::desc("Disable hazard detection during preRA scheduling"));
 
 static cl::opt<bool> EnableAccReassociation(
     "acc-reassoc", cl::Hidden, cl::init(true),
@@ -58,7 +58,7 @@ static cl::opt<unsigned int> MaxAccumulatorWidth(
 
 TargetInstrInfo::~TargetInstrInfo() = default;
 
-const TargetRegisterClass*
+const TargetRegisterClass *
 TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum,
                              const TargetRegisterInfo *TRI,
                              const MachineFunction &MF) const {
@@ -112,9 +112,9 @@ static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
 /// simple--i.e. not a logical or arithmetic expression--size values without
 /// the optional fill value. This is primarily used for creating arbitrary
 /// sized inline asm blocks for testing purposes.
-unsigned TargetInstrInfo::getInlineAsmLength(
-  const char *Str,
-  const MCAsmInfo &MAI, const TargetSubtargetInfo *STI) const {
+unsigned
+TargetInstrInfo::getInlineAsmLength(const char *Str, const MCAsmInfo &MAI,
+                                    const TargetSubtargetInfo *STI) const {
   // Count the number of instructions in the asm.
   bool AtInsnStart = true;
   unsigned Length = 0;
@@ -152,9 +152,8 @@ unsigned TargetInstrInfo::getInlineAsmLength(
 
 /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
 /// after it, replacing it with an unconditional branch to NewDest.
-void
-TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
-                                         MachineBasicBlock *NewDest) const {
+void TargetInstrInfo::ReplaceTailWithBranchTo(
+    MachineBasicBlock::iterator Tail, MachineBasicBlock *NewDest) const {
   MachineBasicBlock *MBB = Tail->getParent();
 
   // Remove all the old successors of MBB from the CFG.
@@ -188,8 +187,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
     // No idea how to commute this instruction. Target should implement its own.
     return nullptr;
 
-  unsigned CommutableOpIdx1 = Idx1; (void)CommutableOpIdx1;
-  unsigned CommutableOpIdx2 = Idx2; (void)CommutableOpIdx2;
+  unsigned CommutableOpIdx1 = Idx1;
+  (void)CommutableOpIdx1;
+  unsigned CommutableOpIdx2 = Idx2;
+  (void)CommutableOpIdx2;
   assert(findCommutedOpIndices(MI, CommutableOpIdx1, CommutableOpIdx2) &&
          CommutableOpIdx1 == Idx1 && CommutableOpIdx2 == Idx2 &&
          "TargetInstrInfo::CommuteInstructionImpl(): not commutable operands.");
@@ -214,38 +215,38 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
       Reg1.isPhysical() ? MI.getOperand(Idx1).isRenamable() : false;
   bool Reg2IsRenamable =
       Reg2.isPhysical() ? MI.getOperand(Idx2).isRenamable() : false;
-  // If destination is tied to either of the commuted source register, then
-  // it must be updated.
-  if (HasDef && Reg0 == Reg1 &&
-      MI.getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
-    Reg2IsKill = false;
-    Reg0 = Reg2;
-    SubReg0 = SubReg2;
-  } else if (HasDef && Reg0 == Reg2 &&
-             MI.getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
-    Reg1IsKill = false;
-    Reg0 = Reg1;
-    SubReg0 = SubReg1;
-  }
 
   // For a case like this:
   //   %0.sub = INST %0.sub(tied), %1.sub, implicit-def %0
   // we need to update the implicit-def after commuting to result in:
   //   %1.sub = INST %1.sub(tied), %0.sub, implicit-def %1
   SmallVector<unsigned> UpdateImplicitDefIdx;
-  if (HasDef && MI.hasImplicitDef() && MI.getOperand(0).getReg() != Reg0) {
+  if (HasDef && MI.hasImplicitDef() && MI.getOperand(0).getReg() == Reg0) {
     const TargetRegisterInfo *TRI =
         MI.getMF()->getSubtarget().getRegisterInfo();
-    Register OrigReg0 = MI.getOperand(0).getReg();
     for (auto [OpNo, MO] : llvm::enumerate(MI.implicit_operands())) {
       Register ImplReg = MO.getReg();
-      if ((ImplReg.isVirtual() && ImplReg == OrigReg0) ||
-          (ImplReg.isPhysical() && OrigReg0.isPhysical() &&
-           TRI->isSubRegisterEq(ImplReg, OrigReg0)))
+      if ((ImplReg.isVirtual() && ImplReg == Reg0) ||
+          (ImplReg.isPhysical() && Reg0.isPhysical() &&
+           TRI->isSubRegisterEq(ImplReg, Reg0)))
         UpdateImplicitDefIdx.push_back(OpNo + MI.getNumExplicitOperands());
     }
   }
 
+  // If destination is tied to either of the commuted source register, then
+  // it must be updated.
+  if (HasDef && Reg0 == Reg1 &&
+      MI.getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) {
+    Reg2IsKill = false;
+    Reg0 = Reg2;
+    SubReg0 = SubReg2;
+  } else if (HasDef && Reg0 == Reg2 &&
+             MI.getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) {
+    Reg1IsKill = false;
+    Reg0 = Reg1;
+    SubReg0 = SubReg1;
+  }
+
   MachineInstr *CommutedMI = nullptr;
   if (NewMI) {
     // Create a new instruction.
@@ -340,8 +341,8 @@ bool TargetInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
   // is not true, then the target must implement this.
   unsigned CommutableOpIdx1 = MCID.getNumDefs();
   unsigned CommutableOpIdx2 = CommutableOpIdx1 + 1;
-  if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2,
-                            CommutableOpIdx1, CommutableOpIdx2))
+  if (!fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, CommutableOpIdx1,
+                            CommutableOpIdx2))
     return false;
 
   if (!MI.getOperand(SrcOpIdx1).isReg() || !MI.getOperand(SrcOpIdx2).isReg())
@@ -351,7 +352,8 @@ bool TargetInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
 }
 
 bool TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr &MI) const {
-  if (!MI.isTerminator()) return false;
+  if (!MI.isTerminator())
+    return false;
 
   // Conditional branch is a special case.
   if (MI.isBranch() && !MI.isBarrier())
@@ -488,7 +490,7 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
   assert(TII.isCopyInstr(MI) && "MI must be a COPY instruction");
   if (MI.getNumOperands() != 2)
     return nullptr;
-  assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand");
+  assert(FoldIdx < 2 && "FoldIdx refers no nonexistent operand");
 
   const MachineOperand &FoldOp = MI.getOperand(FoldIdx);
   const MachineOperand &LiveOp = MI.getOperand(1 - FoldIdx);
@@ -617,8 +619,7 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
       unsigned SpillSize;
       unsigned SpillOffset;
       // Compute the spill slot size and offset.
-      const TargetRegisterClass *RC =
-        MF.getRegInfo().getRegClass(MO.getReg());
+      const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(MO.getReg());
       bool Valid =
           TII.getStackSlotRange(RC, MO.getSubReg(), SpillSize, SpillOffset, MF);
       if (!Valid)
@@ -763,11 +764,9 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
   if (NewMI) {
     NewMI->setMemRefs(MF, MI.memoperands());
     // Add a memory operand, foldMemoryOperandImpl doesn't do that.
-    assert((!(Flags & MachineMemOperand::MOStore) ||
-            NewMI->mayStore()) &&
+    assert((!(Flags & MachineMemOperand::MOStore) || NewMI->mayStore()) &&
            "Folded a def to a non-store!");
-    assert((!(Flags & MachineMemOperand::MOLoad) ||
-            NewMI->mayLoad()) &&
+    assert((!(Flags & MachineMemOperand::MOLoad) || NewMI->mayLoad()) &&
            "Folded a use to a non-load!");
     assert(MFI.getObjectOffset(FI) != -1);
     MachineMemOperand *MMO =
@@ -1623,7 +1622,8 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializable(
   // If any of the registers accessed are non-constant, conservatively assume
   // the instruction is not rematerializable.
   for (const MachineOperand &MO : MI.operands()) {
-    if (!MO.isReg()) continue;
+    if (!MO.isReg())
+      continue;
     Register Reg = MO.getReg();
     if (Reg == 0)
       continue;
@@ -1663,7 +1663,7 @@ int TargetInstrInfo::getSPAdjust(const MachineInstr &MI) const {
   const MachineFunction *MF = MI.getMF();
   const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
   bool StackGrowsDown =
-    TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
+      TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
 
   unsigned FrameSetupOpcode = getCallFrameSetupOpcode();
   unsigned FrameDestroyOpcode = getCallFrameDestroyOpcode();
@@ -1711,9 +1711,9 @@ bool TargetInstrInfo::usePreRAHazardRecognizer() const {
 }
 
 // Default implementation of CreateTargetRAHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfo::
-CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
-                             const ScheduleDAG *DAG) const {
+ScheduleHazardRecognizer *
+TargetInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,
+                                              const ScheduleDAG *DAG) const {
   // Dummy hazard recognizer allows all instructions to issue.
   return new ScheduleHazardRecognizer();
 }
@@ -1725,9 +1725,8 @@ ScheduleHazardRecognizer *TargetInstrInfo::CreateTargetMIHazardRecognizer(
 }
 
 // Default implementation of CreateTargetPostRAHazardRecognizer.
-ScheduleHazardRecognizer *TargetInstrInfo::
-CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
-                                   const ScheduleDAG *DAG) const {
+ScheduleHazardRecognizer *TargetInstrInfo::CreateTargetPostRAHazardRecognizer(
+    const InstrItineraryData *II, const ScheduleDAG *DAG) const {
   return new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
 }
 
@@ -1912,7 +1911,8 @@ TargetInstrInfo::describeLoadedValue(const MachineInstr &MI,
     // TODO: Can currently only handle mem instructions with a single define.
     // An example from the x86 target:
     //    ...
-    //    DIV64m $rsp, 1, $noreg, 24, $noreg, implicit-def dead $rax, implicit-def $rdx
+    //    DIV64m $rsp, 1, $noreg, 24, $noreg, implicit-def dead $rax,
+    //    implicit-def $rdx
     //    ...
     //
     if (MI.getNumExplicitDefs() != 1)
@@ -1961,8 +1961,8 @@ std::optional<unsigned> TargetInstrInfo::getOperandLatency(
 bool TargetInstrInfo::getRegSequenceInputs(
     const MachineInstr &MI, unsigned DefIdx,
     SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {
-  assert((MI.isRegSequence() ||
-          MI.isRegSequenceLike()) && "Instruction do not have the proper type");
+  assert((MI.isRegSequence() || MI.isRegSequenceLike()) &&
+         "Instruction do not have the proper type");
 
   if (!MI.isRegSequence())
     return getRegSequenceLikeInputs(MI, DefIdx, InputRegs);
@@ -1988,8 +1988,8 @@ bool TargetInstrInfo::getRegSequenceInputs(
 bool TargetInstrInfo::getExtractSubregInputs(
     const MachineInstr &MI, unsigned DefIdx,
     RegSubRegPairAndIdx &InputReg) const {
-  assert((MI.isExtractSubreg() ||
-      MI.isExtractSubregLike()) && "Instruction do not have the proper type");
+  assert((MI.isExtractSubreg() || MI.isExtractSubregLike()) &&
+         "Instruction do not have the proper type");
 
   if (!MI.isExtractSubreg())
     return getExtractSubregLikeInputs(MI, DefIdx, InputReg);
@@ -2011,10 +2011,10 @@ bool TargetInstrInfo::getExtractSubregInputs(
 }
 
 bool TargetInstrInfo::getInsertSubregInputs(
-    const MachineInstr &MI, unsigned DefIdx,
-    RegSubRegPair &BaseReg, RegSubRegPairAndIdx &InsertedReg) const {
-  assert((MI.isInsertSubreg() ||
-      MI.isInsertSubregLike()) && "Instruction do not have the proper type");
+    const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,
+    RegSubRegPairAndIdx &InsertedReg) const {
+  assert((MI.isInsertSubreg() || MI.isInsertSubregLike()) &&
+         "Instruction do not have the proper type");
 
   if (!MI.isInsertSubreg())
     return getInsertSubregLikeInputs(MI, DefIdx, BaseReg, InsertedReg);
@@ -2145,13 +2145,13 @@ TargetInstrInfo::getOutliningType(const MachineModuleInfo &MMI,
 
   // Some other special cases.
   switch (MI.getOpcode()) {
-    case TargetOpcode::IMPLICIT_DEF:
-    case TargetOpcode::KILL:
-    case TargetOpcode::LIFETIME_START:
-    case TargetOpcode::LIFETIME_END:
-      return outliner::InstrType::Invisible;
-    default:
-      break;
+  case TargetOpcode::IMPLICIT_DEF:
+  case TargetOpcode::KILL:
+  case TargetOpcode::LIFETIME_START:
+  case TargetOpcode::LIFETIME_END:
+    return outliner::InstrType::Invisible;
+  default:
+    break;
   }
 
   // Is this a terminator for a basic block?



More information about the llvm-commits mailing list