[llvm] 70c781f - [SIFoldOperands] Move `isFoldableCopy` into a separate helper, NFC.

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 7 23:44:40 PST 2022


Author: Pierre van Houtryve
Date: 2022-11-08T07:44:34Z
New Revision: 70c781f4b6f4b9aa851dbf950b53569a8f8bd1e8

URL: https://github.com/llvm/llvm-project/commit/70c781f4b6f4b9aa851dbf950b53569a8f8bd1e8
DIFF: https://github.com/llvm/llvm-project/commit/70c781f4b6f4b9aa851dbf950b53569a8f8bd1e8.diff

LOG: [SIFoldOperands] Move `isFoldableCopy` into a separate helper, NFC.

There was quite a bit of logic there that was just in the middle of core loop. I think it makes it easier to follow when it's split off in a separate helper like the others.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D137538

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIFoldOperands.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 4897f481bf3c9..36a7f8f3ad927 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -111,6 +111,8 @@ class SIFoldOperands : public MachineFunctionPass {
   bool tryFoldCndMask(MachineInstr &MI) const;
   bool tryFoldZeroHighBits(MachineInstr &MI) const;
   bool foldInstOperand(MachineInstr &MI, MachineOperand &OpToFold) const;
+  bool tryFoldFoldableCopy(MachineInstr &MI,
+                           MachineOperand *&CurrentKnownM0Val) const;
 
   const MachineOperand *isClamp(const MachineInstr &MI) const;
   bool tryFoldClamp(MachineInstr &MI);
@@ -1292,6 +1294,73 @@ bool SIFoldOperands::foldInstOperand(MachineInstr &MI,
   return true;
 }
 
+bool SIFoldOperands::tryFoldFoldableCopy(
+    MachineInstr &MI, MachineOperand *&CurrentKnownM0Val) const {
+  // Specially track simple redefs of m0 to the same value in a block, so we
+  // can erase the later ones.
+  if (MI.getOperand(0).getReg() == AMDGPU::M0) {
+    MachineOperand &NewM0Val = MI.getOperand(1);
+    if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
+      MI.eraseFromParent();
+      return true;
+    }
+
+    // We aren't tracking other physical registers
+    CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical())
+                            ? nullptr
+                            : &NewM0Val;
+    return false;
+  }
+
+  MachineOperand &OpToFold = MI.getOperand(1);
+  bool FoldingImm = OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
+
+  // FIXME: We could also be folding things like TargetIndexes.
+  if (!FoldingImm && !OpToFold.isReg())
+    return false;
+
+  if (OpToFold.isReg() && !OpToFold.getReg().isVirtual())
+    return false;
+
+  // Prevent folding operands backwards in the function. For example,
+  // the COPY opcode must not be replaced by 1 in this example:
+  //
+  //    %3 = COPY %vgpr0; VGPR_32:%3
+  //    ...
+  //    %vgpr0 = V_MOV_B32_e32 1, implicit %exec
+  if (!MI.getOperand(0).getReg().isVirtual())
+    return false;
+
+  bool Changed = foldInstOperand(MI, OpToFold);
+
+  // If we managed to fold all uses of this copy then we might as well
+  // delete it now.
+  // The only reason we need to follow chains of copies here is that
+  // tryFoldRegSequence looks forward through copies before folding a
+  // REG_SEQUENCE into its eventual users.
+  auto *InstToErase = &MI;
+  while (MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
+    auto &SrcOp = InstToErase->getOperand(1);
+    auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register();
+    InstToErase->eraseFromParent();
+    Changed = true;
+    InstToErase = nullptr;
+    if (!SrcReg || SrcReg.isPhysical())
+      break;
+    InstToErase = MRI->getVRegDef(SrcReg);
+    if (!InstToErase || !TII->isFoldableCopy(*InstToErase))
+      break;
+  }
+
+  if (InstToErase && InstToErase->isRegSequence() &&
+      MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
+    InstToErase->eraseFromParent();
+    Changed = true;
+  }
+
+  return Changed;
+}
+
 // Clamp patterns are canonically selected to v_max_* instructions, so only
 // handle them.
 const MachineOperand *SIFoldOperands::isClamp(const MachineInstr &MI) const {
@@ -1746,82 +1815,22 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
         continue;
       }
 
-      if (!TII->isFoldableCopy(MI)) {
-        // Saw an unknown clobber of m0, so we no longer know what it is.
-        if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
-          CurrentKnownM0Val = nullptr;
-
-        // TODO: Omod might be OK if there is NSZ only on the source
-        // instruction, and not the omod multiply.
-        if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
-            !tryFoldOMod(MI))
-          Changed |= tryFoldClamp(MI);
-
+      if (TII->isFoldableCopy(MI)) {
+        Changed |= tryFoldFoldableCopy(MI, CurrentKnownM0Val);
         continue;
       }
 
-      // Specially track simple redefs of m0 to the same value in a block, so we
-      // can erase the later ones.
-      if (MI.getOperand(0).getReg() == AMDGPU::M0) {
-        MachineOperand &NewM0Val = MI.getOperand(1);
-        if (CurrentKnownM0Val && CurrentKnownM0Val->isIdenticalTo(NewM0Val)) {
-          MI.eraseFromParent();
-          Changed = true;
-          continue;
-        }
-
-        // We aren't tracking other physical registers
-        CurrentKnownM0Val = (NewM0Val.isReg() && NewM0Val.getReg().isPhysical()) ?
-          nullptr : &NewM0Val;
-        continue;
-      }
-
-      MachineOperand &OpToFold = MI.getOperand(1);
-      bool FoldingImm =
-          OpToFold.isImm() || OpToFold.isFI() || OpToFold.isGlobal();
-
-      // FIXME: We could also be folding things like TargetIndexes.
-      if (!FoldingImm && !OpToFold.isReg())
-        continue;
-
-      if (OpToFold.isReg() && !OpToFold.getReg().isVirtual())
-        continue;
+      // Saw an unknown clobber of m0, so we no longer know what it is.
+      if (CurrentKnownM0Val && MI.modifiesRegister(AMDGPU::M0, TRI))
+        CurrentKnownM0Val = nullptr;
 
-      // Prevent folding operands backwards in the function. For example,
-      // the COPY opcode must not be replaced by 1 in this example:
-      //
-      //    %3 = COPY %vgpr0; VGPR_32:%3
-      //    ...
-      //    %vgpr0 = V_MOV_B32_e32 1, implicit %exec
-      if (!MI.getOperand(0).getReg().isVirtual())
-        continue;
-
-      Changed |= foldInstOperand(MI, OpToFold);
-
-      // If we managed to fold all uses of this copy then we might as well
-      // delete it now.
-      // The only reason we need to follow chains of copies here is that
-      // tryFoldRegSequence looks forward through copies before folding a
-      // REG_SEQUENCE into its eventual users.
-      auto *InstToErase = &MI;
-      while (MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
-        auto &SrcOp = InstToErase->getOperand(1);
-        auto SrcReg = SrcOp.isReg() ? SrcOp.getReg() : Register();
-        InstToErase->eraseFromParent();
-        Changed = true;
-        InstToErase = nullptr;
-        if (!SrcReg || SrcReg.isPhysical())
-          break;
-        InstToErase = MRI->getVRegDef(SrcReg);
-        if (!InstToErase || !TII->isFoldableCopy(*InstToErase))
-          break;
-      }
-      if (InstToErase && InstToErase->isRegSequence() &&
-          MRI->use_nodbg_empty(InstToErase->getOperand(0).getReg())) {
-        InstToErase->eraseFromParent();
-        Changed = true;
-      }
+      // TODO: Omod might be OK if there is NSZ only on the source
+      // instruction, and not the omod multiply.
+      if (IsIEEEMode || (!HasNSZ && !MI.getFlag(MachineInstr::FmNsz)) ||
+          !tryFoldOMod(MI))
+        Changed |= tryFoldClamp(MI);
     }
   }
+
   return Changed;
 }


        


More information about the llvm-commits mailing list