[llvm] [LSR] Split the -lsr-term-fold transformation into it's own pass (PR #104234)

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 14 13:53:05 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Philip Reames (preames)

<details>
<summary>Changes</summary>

This transformation doesn't actually use any of the internal state of
LSR and recomputes all information from SCEV.  Splitting it out makes
it easier to test.
    
Note that long term I would like to write a version of this transform
which *is* integrated with LSR's solver, but if that happens, we'll
just delete the extra pass.
    
Integration wise, I switched from using TTI to using a pass configuration
variable.  This seems slightly more idiomatic, and means we don't run
the extra logic on any target other than RISCV.

---

Patch is 43.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/104234.diff


25 Files Affected:

- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (-9) 
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (-2) 
- (modified) llvm/include/llvm/CodeGen/BasicTTIImpl.h (-5) 
- (modified) llvm/include/llvm/CodeGen/TargetPassConfig.h (+3) 
- (modified) llvm/include/llvm/InitializePasses.h (+1) 
- (modified) llvm/include/llvm/LinkAllPasses.h (+1) 
- (modified) llvm/include/llvm/Passes/MachinePassRegistry.def (+1) 
- (modified) llvm/include/llvm/Transforms/Scalar.h (+8) 
- (added) llvm/include/llvm/Transforms/Scalar/LoopTermFold.h (+30) 
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (-4) 
- (modified) llvm/lib/CodeGen/TargetPassConfig.cpp (+2) 
- (modified) llvm/lib/Passes/PassBuilder.cpp (+1) 
- (modified) llvm/lib/Passes/PassRegistry.def (+1) 
- (modified) llvm/lib/Target/RISCV/RISCVTargetMachine.cpp (+1) 
- (modified) llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h (-4) 
- (modified) llvm/lib/Transforms/Scalar/CMakeLists.txt (+1) 
- (modified) llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp (+1-262) 
- (added) llvm/lib/Transforms/Scalar/LoopTermFold.cpp (+386) 
- (modified) llvm/lib/Transforms/Scalar/Scalar.cpp (+1) 
- (modified) llvm/test/CodeGen/RISCV/O3-pipeline.ll (+1) 
- (modified) llvm/test/Transforms/LoopStrengthReduce/RISCV/lsr-cost-compare.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopStrengthReduce/RISCV/term-fold-crash.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold-negative-testcase.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopStrengthReduce/lsr-term-fold.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopStrengthReduce/lsr-unreachable-bb-phi-node.ll (+3-2) 


``````````diff
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 38e8b9da213974..2c998f3e3a73cd 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -742,11 +742,6 @@ class TargetTransformInfo {
   /// cost should return false, otherwise return true.
   bool isNumRegsMajorCostOfLSR() const;
 
-  /// Return true if LSR should attempts to replace a use of an otherwise dead
-  /// primary IV in the latch condition with another IV available in the loop.
-  /// When successful, makes the primary IV dead.
-  bool shouldFoldTerminatingConditionAfterLSR() const;
-
   /// Return true if LSR should drop a found solution if it's calculated to be
   /// less profitable than the baseline.
   bool shouldDropLSRSolutionIfLessProfitable() const;
@@ -1891,7 +1886,6 @@ class TargetTransformInfo::Concept {
   virtual bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
                              const TargetTransformInfo::LSRCost &C2) = 0;
   virtual bool isNumRegsMajorCostOfLSR() = 0;
-  virtual bool shouldFoldTerminatingConditionAfterLSR() const = 0;
   virtual bool shouldDropLSRSolutionIfLessProfitable() const = 0;
   virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
   virtual bool canMacroFuseCmp() = 0;
@@ -2370,9 +2364,6 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
   bool isNumRegsMajorCostOfLSR() override {
     return Impl.isNumRegsMajorCostOfLSR();
   }
-  bool shouldFoldTerminatingConditionAfterLSR() const override {
-    return Impl.shouldFoldTerminatingConditionAfterLSR();
-  }
   bool shouldDropLSRSolutionIfLessProfitable() const override {
     return Impl.shouldDropLSRSolutionIfLessProfitable();
   }
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index d208a710bb27fd..11b07ac0b7fc47 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -244,8 +244,6 @@ class TargetTransformInfoImplBase {
 
   bool isNumRegsMajorCostOfLSR() const { return true; }
 
-  bool shouldFoldTerminatingConditionAfterLSR() const { return false; }
-
   bool shouldDropLSRSolutionIfLessProfitable() const { return false; }
 
   bool isProfitableLSRChainElement(Instruction *I) const { return false; }
diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 279cfb5aa47d6f..a57bd876561b60 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -394,11 +394,6 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     return TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR();
   }
 
-  bool shouldFoldTerminatingConditionAfterLSR() const {
-    return TargetTransformInfoImplBase::
-        shouldFoldTerminatingConditionAfterLSR();
-  }
-
   bool shouldDropLSRSolutionIfLessProfitable() const {
     return TargetTransformInfoImplBase::shouldDropLSRSolutionIfLessProfitable();
   }
diff --git a/llvm/include/llvm/CodeGen/TargetPassConfig.h b/llvm/include/llvm/CodeGen/TargetPassConfig.h
index d00e0bed91a457..ecff65c483d8d8 100644
--- a/llvm/include/llvm/CodeGen/TargetPassConfig.h
+++ b/llvm/include/llvm/CodeGen/TargetPassConfig.h
@@ -140,6 +140,9 @@ class TargetPassConfig : public ImmutablePass {
   /// callers.
   bool RequireCodeGenSCCOrder = false;
 
+  /// Enable LoopTailFold immediately after LSR
+  bool EnableLoopTailFold = false;
+
   /// Add the actual instruction selection passes. This does not include
   /// preparation passes on IR.
   bool addCoreISelPasses();
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 261912aab3076c..3bf1840cf9a0fe 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -168,6 +168,7 @@ void initializeLoopInfoWrapperPassPass(PassRegistry&);
 void initializeLoopPassPass(PassRegistry&);
 void initializeLoopSimplifyPass(PassRegistry&);
 void initializeLoopStrengthReducePass(PassRegistry&);
+void initializeLoopTermFoldPass(PassRegistry&);
 void initializeLoopUnrollPass(PassRegistry&);
 void initializeLowerAtomicLegacyPassPass(PassRegistry&);
 void initializeLowerConstantIntrinsicsPass(PassRegistry&);
diff --git a/llvm/include/llvm/LinkAllPasses.h b/llvm/include/llvm/LinkAllPasses.h
index c00e425b131987..b490e816f740ca 100644
--- a/llvm/include/llvm/LinkAllPasses.h
+++ b/llvm/include/llvm/LinkAllPasses.h
@@ -88,6 +88,7 @@ namespace {
       (void) llvm::createLoopExtractorPass();
       (void) llvm::createLoopSimplifyPass();
       (void) llvm::createLoopStrengthReducePass();
+      (void) llvm::createLoopTermFoldPass();
       (void)llvm::createLoopUnrollPass();
       (void) llvm::createLowerGlobalDtorsLegacyPass();
       (void) llvm::createLowerInvokePass();
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 8e669ee5791239..05baf514fa7210 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -79,6 +79,7 @@ FUNCTION_PASS("win-eh-prepare", WinEHPreparePass())
 #define LOOP_PASS(NAME, CREATE_PASS)
 #endif
 LOOP_PASS("loop-reduce", LoopStrengthReducePass())
+LOOP_PASS("loop-term-fold", LoopTermFoldPass())
 #undef LOOP_PASS
 
 #ifndef MACHINE_MODULE_PASS
diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 98d0adca355214..17f4327eb3e1ab 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -51,6 +51,14 @@ Pass *createLICMPass();
 //
 Pass *createLoopStrengthReducePass();
 
+//===----------------------------------------------------------------------===//
+//
+// LoopTermFold -  This pass attempts to eliminate the last use of an IV in
+// a loop terminator instruction by rewriting it in terms of another IV.
+// Expected to be run immediately after LSR.
+//
+Pass *createLoopTermFoldPass();
+
 //===----------------------------------------------------------------------===//
 //
 // LoopUnroll - This pass is a simple loop unrolling pass.
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h b/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h
new file mode 100644
index 00000000000000..92c3ed35152756
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/LoopTermFold.h
@@ -0,0 +1,30 @@
+//===- LoopTermFold.h - Loop Term Fold Pass ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_LOOPTERMFOLD_H
+#define LLVM_TRANSFORMS_SCALAR_LOOPTERMFOLD_H
+
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Loop;
+class LPMUpdater;
+
+class LoopTermFoldPass : public PassInfoMixin<LoopTermFoldPass> {
+public:
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_LOOPTERMFOLD_H
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index dcde78925bfa98..2c26493bd3f1ca 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -427,10 +427,6 @@ bool TargetTransformInfo::isNumRegsMajorCostOfLSR() const {
   return TTIImpl->isNumRegsMajorCostOfLSR();
 }
 
-bool TargetTransformInfo::shouldFoldTerminatingConditionAfterLSR() const {
-  return TTIImpl->shouldFoldTerminatingConditionAfterLSR();
-}
-
 bool TargetTransformInfo::shouldDropLSRSolutionIfLessProfitable() const {
   return TTIImpl->shouldDropLSRSolutionIfLessProfitable();
 }
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 1b0012b65b80d4..45ddc941b5e29b 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -828,6 +828,8 @@ void TargetPassConfig::addIRPasses() {
     if (!DisableLSR) {
       addPass(createCanonicalizeFreezeInLoopsPass());
       addPass(createLoopStrengthReducePass());
+      if (EnableLoopTailFold)
+        addPass(createLoopTermFoldPass());
       if (PrintLSR)
         addPass(createPrintFunctionPass(dbgs(),
                                         "\n\n*** Code after LSR ***\n"));
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 1859fde279c98d..2ba9e14b91c098 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -248,6 +248,7 @@
 #include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
 #include "llvm/Transforms/Scalar/LoopSink.h"
 #include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
+#include "llvm/Transforms/Scalar/LoopTermFold.h"
 #include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
 #include "llvm/Transforms/Scalar/LoopUnrollPass.h"
 #include "llvm/Transforms/Scalar/LoopVersioningLICM.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 5ef8ba30944470..7cd944f3cee0b6 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -641,6 +641,7 @@ LOOP_PASS("loop-idiom-vectorize", LoopIdiomVectorizePass())
 LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
 LOOP_PASS("loop-predication", LoopPredicationPass())
 LOOP_PASS("loop-reduce", LoopStrengthReducePass())
+LOOP_PASS("loop-term-fold", LoopTermFoldPass())
 LOOP_PASS("loop-simplifycfg", LoopSimplifyCFGPass())
 LOOP_PASS("loop-unroll-full", LoopFullUnrollPass())
 LOOP_PASS("loop-versioning-licm", LoopVersioningLICMPass())
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index b6884321f08411..e0ef5c91520ab8 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -336,6 +336,7 @@ class RISCVPassConfig : public TargetPassConfig {
     if (TM.getOptLevel() != CodeGenOptLevel::None)
       substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
     setEnableSinkAndFold(EnableSinkFold);
+    EnableLoopTailFold = true;
   }
 
   RISCVTargetMachine &getRISCVTargetMachine() const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 9c37a4f6ec2d04..b76c77fbee389d 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -394,10 +394,6 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
   bool isLSRCostLess(const TargetTransformInfo::LSRCost &C1,
                      const TargetTransformInfo::LSRCost &C2);
 
-  bool shouldFoldTerminatingConditionAfterLSR() const {
-    return true;
-  }
-
   std::optional<unsigned> getMinPageSize() const { return 4096; }
 };
 
diff --git a/llvm/lib/Transforms/Scalar/CMakeLists.txt b/llvm/lib/Transforms/Scalar/CMakeLists.txt
index ba09ebf8b04c4c..939a1457239567 100644
--- a/llvm/lib/Transforms/Scalar/CMakeLists.txt
+++ b/llvm/lib/Transforms/Scalar/CMakeLists.txt
@@ -44,6 +44,7 @@ add_llvm_component_library(LLVMScalarOpts
   LoopRotation.cpp
   LoopSimplifyCFG.cpp
   LoopStrengthReduce.cpp
+  LoopTermFold.cpp
   LoopUnrollPass.cpp
   LoopUnrollAndJamPass.cpp
   LoopVersioningLICM.cpp
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 91461d1ed27592..75754eb3a46714 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -81,6 +81,7 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/Passes.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Constant.h"
@@ -189,10 +190,6 @@ static cl::opt<unsigned> SetupCostDepthLimit(
     "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
     cl::desc("The limit on recursion depth for LSRs setup cost"));
 
-static cl::opt<cl::boolOrDefault> AllowTerminatingConditionFoldingAfterLSR(
-    "lsr-term-fold", cl::Hidden,
-    cl::desc("Attempt to replace primary IV with other IV."));
-
 static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable(
     "lsr-drop-solution", cl::Hidden,
     cl::desc("Attempt to drop solution if it is less profitable"));
@@ -205,9 +202,6 @@ static cl::opt<bool> DropScaledForVScale(
     "lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true),
     cl::desc("Avoid using scaled registers with vscale-relative addressing"));
 
-STATISTIC(NumTermFold,
-          "Number of terminating condition fold recognized and performed");
-
 #ifndef NDEBUG
 // Stress test IV chain generation.
 static cl::opt<bool> StressIVChain(
@@ -7062,186 +7056,6 @@ static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
   return nullptr;
 }
 
-static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
-canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
-                      const LoopInfo &LI, const TargetTransformInfo &TTI) {
-  if (!L->isInnermost()) {
-    LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n");
-    return std::nullopt;
-  }
-  // Only inspect on simple loop structure
-  if (!L->isLoopSimplifyForm()) {
-    LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n");
-    return std::nullopt;
-  }
-
-  if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
-    LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n");
-    return std::nullopt;
-  }
-
-  BasicBlock *LoopLatch = L->getLoopLatch();
-  BranchInst *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
-  if (!BI || BI->isUnconditional())
-    return std::nullopt;
-  auto *TermCond = dyn_cast<ICmpInst>(BI->getCondition());
-  if (!TermCond) {
-    LLVM_DEBUG(
-        dbgs() << "Cannot fold on branching condition that is not an ICmpInst");
-    return std::nullopt;
-  }
-  if (!TermCond->hasOneUse()) {
-    LLVM_DEBUG(
-        dbgs()
-        << "Cannot replace terminating condition with more than one use\n");
-    return std::nullopt;
-  }
-
-  BinaryOperator *LHS = dyn_cast<BinaryOperator>(TermCond->getOperand(0));
-  Value *RHS = TermCond->getOperand(1);
-  if (!LHS || !L->isLoopInvariant(RHS))
-    // We could pattern match the inverse form of the icmp, but that is
-    // non-canonical, and this pass is running *very* late in the pipeline.
-    return std::nullopt;
-
-  // Find the IV used by the current exit condition.
-  PHINode *ToFold;
-  Value *ToFoldStart, *ToFoldStep;
-  if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep))
-    return std::nullopt;
-
-  // Ensure the simple recurrence is a part of the current loop.
-  if (ToFold->getParent() != L->getHeader())
-    return std::nullopt;
-
-  // If that IV isn't dead after we rewrite the exit condition in terms of
-  // another IV, there's no point in doing the transform.
-  if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond))
-    return std::nullopt;
-
-  // Inserting instructions in the preheader has a runtime cost, scale
-  // the allowed cost with the loops trip count as best we can.
-  const unsigned ExpansionBudget = [&]() {
-    unsigned Budget = 2 * SCEVCheapExpansionBudget;
-    if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L))
-      return std::min(Budget, SmallTC);
-    if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L))
-      return std::min(Budget, *SmallTC);
-    // Unknown trip count, assume long running by default.
-    return Budget;
-  }();
-
-  const SCEV *BECount = SE.getBackedgeTakenCount(L);
-  const DataLayout &DL = L->getHeader()->getDataLayout();
-  SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
-
-  PHINode *ToHelpFold = nullptr;
-  const SCEV *TermValueS = nullptr;
-  bool MustDropPoison = false;
-  auto InsertPt = L->getLoopPreheader()->getTerminator();
-  for (PHINode &PN : L->getHeader()->phis()) {
-    if (ToFold == &PN)
-      continue;
-
-    if (!SE.isSCEVable(PN.getType())) {
-      LLVM_DEBUG(dbgs() << "IV of phi '" << PN
-                        << "' is not SCEV-able, not qualified for the "
-                           "terminating condition folding.\n");
-      continue;
-    }
-    const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
-    // Only speculate on affine AddRec
-    if (!AddRec || !AddRec->isAffine()) {
-      LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN
-                        << "' is not an affine add recursion, not qualified "
-                           "for the terminating condition folding.\n");
-      continue;
-    }
-
-    // Check that we can compute the value of AddRec on the exiting iteration
-    // without soundness problems.  evaluateAtIteration internally needs
-    // to multiply the stride of the iteration number - which may wrap around.
-    // The issue here is subtle because computing the result accounting for
-    // wrap is insufficient. In order to use the result in an exit test, we
-    // must also know that AddRec doesn't take the same value on any previous
-    // iteration. The simplest case to consider is a candidate IV which is
-    // narrower than the trip count (and thus original IV), but this can
-    // also happen due to non-unit strides on the candidate IVs.
-    if (!AddRec->hasNoSelfWrap() ||
-        !SE.isKnownNonZero(AddRec->getStepRecurrence(SE)))
-      continue;
-
-    const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE);
-    const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE);
-    if (!Expander.isSafeToExpand(TermValueSLocal)) {
-      LLVM_DEBUG(
-          dbgs() << "Is not safe to expand terminating value for phi node" << PN
-                 << "\n");
-      continue;
-    }
-
-    if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget,
-                                     &TTI, InsertPt)) {
-      LLVM_DEBUG(
-          dbgs() << "Is too expensive to expand terminating value for phi node"
-                 << PN << "\n");
-      continue;
-    }
-
-    // The candidate IV may have been otherwise dead and poison from the
-    // very first iteration.  If we can't disprove that, we can't use the IV.
-    if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) {
-      LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV "
-                        << PN << "\n");
-      continue;
-    }
-
-    // The candidate IV may become poison on the last iteration.  If this
-    // value is not branched on, this is a well defined program.  We're
-    // about to add a new use to this IV, and we have to ensure we don't
-    // insert UB which didn't previously exist.
-    bool MustDropPoisonLocal = false;
-    Instruction *PostIncV =
-      cast<Instruction>(PN.getIncomingValueForBlock(LoopLatch));
-    if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(),
-                                       &DT)) {
-      LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use"
-                        << PN << "\n");
-
-      // If this is a complex recurrance with multiple instructions computing
-      // the backedge value, we might need to strip poison flags from all of
-      // them.
-      if (PostIncV->getOperand(0) != &PN)
-        continue;
-
-      // In order to perform the transform, we need to drop the poison generating
-      // flags on this instruction (if any).
-      MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags();
-    }
-
-    // We pick the last legal alternate IV.  We could expore choosing an optimal
-    // alternate IV if we had a decent heuristic to do so.
-    ToHelpFold = &PN;
-    TermValueS = TermValueSLocal;
-    MustDropPoison = MustDropPoisonLocal;
-  }
-
-  LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs()
-                 << "Cannot find other AddRec IV to help folding\n";);
-
-  LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs()
-             << "\nFound loop that can fold terminati...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/104234


More information about the llvm-commits mailing list