[llvm] IndVarSimplify: fix high-cost-expand check (PR #125828)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 5 06:51:09 PST 2025


https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/125828

>From d4f7d8d50a983f08e4454ddca34dc71798b86958 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 5 Feb 2025 09:39:28 +0000
Subject: [PATCH 1/4] IndVarSimplify: don't high-cost-expand in genLoopLimit

Guard against high-cost expansions in genLoopLimit, by checking IVLimit
against SCEVExpander::isHighCostExpansion.
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp        | 12 +++++++++---
 .../Transforms/IndVarSimplify/2011-11-01-lftrptr.ll  |  9 +++------
 llvm/test/Transforms/IndVarSimplify/lftr-pr41998.ll  | 12 +++++-------
 .../IndVarSimplify/rewrite-loop-exit-values-phi.ll   |  9 ++++-----
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 9619dfdbf412317..c3a9fa969eb3d31 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -912,7 +912,8 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
 /// is taken ExitCount times.
 static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
                            const SCEV *ExitCount, bool UsePostInc, Loop *L,
-                           SCEVExpander &Rewriter, ScalarEvolution *SE) {
+                           SCEVExpander &Rewriter, ScalarEvolution *SE,
+                           const TargetTransformInfo *TTI) {
   assert(isLoopCounter(IndVar, L, SE));
   assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
   const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
@@ -935,6 +936,9 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
   const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
   assert(SE->isLoopInvariant(IVLimit, L) &&
          "Computed iteration count is not loop invariant!");
+  if (Rewriter.isHighCostExpansion(IVLimit, L, SCEVCheapExpansionBudget, TTI,
+                                   IndVar))
+    return nullptr;
   return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
                                 ExitingBB->getTerminator());
 }
@@ -995,8 +999,10 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
   }
 
-  Value *ExitCnt = genLoopLimit(
-      IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
+  Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, ExitCount, UsePostInc, L,
+                                Rewriter, SE, TTI);
+  if (!ExitCnt)
+    return false;
   assert(ExitCnt->getType()->isPointerTy() ==
              IndVar->getType()->isPointerTy() &&
          "genLoopLimit missed a cast");
diff --git a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
index cb0be523c70955a..48e5c589797dc98 100644
--- a/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
+++ b/llvm/test/Transforms/IndVarSimplify/2011-11-01-lftrptr.ll
@@ -158,17 +158,14 @@ define i8 @testnullptrint(ptr %buf, ptr %end) nounwind {
 ; PTR64-NEXT:    [[GUARD:%.*]] = icmp ult i32 0, [[CNT]]
 ; PTR64-NEXT:    br i1 [[GUARD]], label [[PREHEADER:%.*]], label [[EXIT:%.*]]
 ; PTR64:       preheader:
-; PTR64-NEXT:    [[TMP1:%.*]] = add i32 [[EI]], -1
-; PTR64-NEXT:    [[TMP2:%.*]] = sub i32 [[TMP1]], [[BI]]
-; PTR64-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
-; PTR64-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
-; PTR64-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP4]]
 ; PTR64-NEXT:    br label [[LOOP:%.*]]
 ; PTR64:       loop:
 ; PTR64-NEXT:    [[P_01_US_US:%.*]] = phi ptr [ null, [[PREHEADER]] ], [ [[GEP:%.*]], [[LOOP]] ]
+; PTR64-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[IVNEXT:%.*]], [[LOOP]] ]
 ; PTR64-NEXT:    [[GEP]] = getelementptr inbounds i8, ptr [[P_01_US_US]], i64 1
 ; PTR64-NEXT:    [[SNEXT:%.*]] = load i8, ptr [[GEP]], align 1
-; PTR64-NEXT:    [[EXITCOND:%.*]] = icmp ne ptr [[GEP]], [[SCEVGEP]]
+; PTR64-NEXT:    [[IVNEXT]] = add nuw i32 [[IV]], 1
+; PTR64-NEXT:    [[EXITCOND:%.*]] = icmp ult i32 [[IVNEXT]], [[CNT]]
 ; PTR64-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT_LOOPEXIT:%.*]]
 ; PTR64:       exit.loopexit:
 ; PTR64-NEXT:    [[SNEXT_LCSSA:%.*]] = phi i8 [ [[SNEXT]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/IndVarSimplify/lftr-pr41998.ll b/llvm/test/Transforms/IndVarSimplify/lftr-pr41998.ll
index b7f4756b2757fdb..636ea9f53042e5f 100644
--- a/llvm/test/Transforms/IndVarSimplify/lftr-pr41998.ll
+++ b/llvm/test/Transforms/IndVarSimplify/lftr-pr41998.ll
@@ -41,17 +41,15 @@ end:
 define void @test_ptr(i32 %start) {
 ; CHECK-LABEL: @test_ptr(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[START:%.*]] to i3
-; CHECK-NEXT:    [[TMP1:%.*]] = sub i3 -1, [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i3 [[TMP1]] to i64
-; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
-; CHECK-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, ptr @data, i64 [[TMP3]]
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[P:%.*]] = phi ptr [ @data, [[ENTRY:%.*]] ], [ [[P_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[I_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[P:%.*]] = phi ptr [ @data, [[ENTRY]] ], [ [[P_INC:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_INC]] = add nuw i32 [[I]], 1
 ; CHECK-NEXT:    [[P_INC]] = getelementptr inbounds i8, ptr [[P]], i64 1
 ; CHECK-NEXT:    store volatile i8 0, ptr [[P_INC]], align 1
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq ptr [[P_INC]], [[UGLYGEP]]
+; CHECK-NEXT:    [[AND:%.*]] = and i32 [[I_INC]], 7
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[AND]], 0
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[LOOP]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
index 37bc67c23adb756..4112b85e43e20b4 100644
--- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
+++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
@@ -14,20 +14,20 @@ define dso_local void @hoge() local_unnamed_addr {
 ; CHECK-LABEL: @hoge(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[N:%.*]] = sdiv exact i64 undef, 40
-; CHECK-NEXT:    [[TMP0:%.*]] = sub i64 undef, [[N]]
 ; CHECK-NEXT:    br label [[HEADER:%.*]]
 ; CHECK:       header:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LATCH:%.*]] ], [ [[TMP0]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], [[LATCH]] ], [ undef, [[ENTRY]] ]
+; CHECK-NEXT:    [[IDX:%.*]] = phi i64 [ [[IDX_NEXT:%.*]], [[LATCH:%.*]] ], [ undef, [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    [[COND:%.*]] = icmp sgt i64 [[N]], [[IDX]]
 ; CHECK-NEXT:    br i1 [[COND]], label [[END:%.*]], label [[INNER_PREHEADER:%.*]]
 ; CHECK:       inner.preheader:
 ; CHECK-NEXT:    br label [[INNER:%.*]]
 ; CHECK:       inner:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[INNER]] ], [ 0, [[INNER_PREHEADER]] ]
+; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[INNER]] ], [ [[N]], [[INNER_PREHEADER]] ]
 ; CHECK-NEXT:    [[I_NEXT]] = add nuw i64 [[I]], 1
+; CHECK-NEXT:    [[J_NEXT]] = add nsw i64 [[J]], 1
 ; CHECK-NEXT:    store i64 undef, ptr @ptr, align 8
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[I_NEXT]], [[INDVARS_IV]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp slt i64 [[J]], [[IDX]]
 ; CHECK-NEXT:    br i1 [[EXITCOND]], label [[INNER]], label [[INNER_EXIT:%.*]]
 ; CHECK:       inner_exit:
 ; CHECK-NEXT:    [[INDVAR:%.*]] = phi i64 [ [[I_NEXT]], [[INNER]] ]
@@ -35,7 +35,6 @@ define dso_local void @hoge() local_unnamed_addr {
 ; CHECK-NEXT:    br label [[LATCH]]
 ; CHECK:       latch:
 ; CHECK-NEXT:    [[IDX_NEXT]] = add nsw i64 [[IDX]], -1
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], -1
 ; CHECK-NEXT:    br label [[HEADER]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret void

>From 4c0f4ee54b00398b7cd19d433c3edee76926ef4a Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 5 Feb 2025 12:55:09 +0000
Subject: [PATCH 2/4] IndVarSimplify: refactor; fold high-cost check

---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 114 +++++++++---------
 .../X86/loop-invariant-conditions.ll          |   8 +-
 .../IndVarSimplify/post-inc-range.ll          |  40 +++---
 .../rewrite-loop-exit-values-phi.ll           |   2 +-
 4 files changed, 86 insertions(+), 78 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c3a9fa969eb3d31..29813c59afd89bb 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -154,8 +154,9 @@ class IndVarSimplify {
   bool rewriteFirstIterationLoopExitValues(Loop *L);
 
   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
-                                 const SCEV *ExitCount,
-                                 PHINode *IndVar, SCEVExpander &Rewriter);
+                                 const SCEV *ExitCount, PHINode *IndVar,
+                                 Instruction *IncVar, bool UsePostInc,
+                                 SCEVExpander &Rewriter);
 
   bool sinkUnusedInvariants(Loop *L);
 
@@ -907,39 +908,38 @@ static PHINode *FindLoopCounter(Loop *L, BasicBlock *ExitingBB,
   return BestPhi;
 }
 
-/// Insert an IR expression which computes the value held by the IV IndVar
-/// (which must be an loop counter w/unit stride) after the backedge of loop L
-/// is taken ExitCount times.
-static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
-                           const SCEV *ExitCount, bool UsePostInc, Loop *L,
-                           SCEVExpander &Rewriter, ScalarEvolution *SE,
-                           const TargetTransformInfo *TTI) {
-  assert(isLoopCounter(IndVar, L, SE));
-  assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
-  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
-  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
-
+static const SCEV *getIVLimit(PHINode *IndVar, const SCEV *ExitCount,
+                              bool UsePostInc, ScalarEvolution *SE) {
   // For integer IVs, truncate the IV before computing the limit unless we
   // know apriori that the limit must be a constant when evaluated in the
   // bitwidth of the IV.  We prefer (potentially) keeping a truncate of the
   // IV in the loop over a (potentially) expensive expansion of the widened
   // exit count add(zext(add)) expression.
+  const SCEVAddRecExpr *AR = cast<SCEVAddRecExpr>(SE->getSCEV(IndVar));
+  assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride");
   if (IndVar->getType()->isIntegerTy() &&
       SE->getTypeSizeInBits(AR->getType()) >
-      SE->getTypeSizeInBits(ExitCount->getType())) {
+          SE->getTypeSizeInBits(ExitCount->getType())) {
     const SCEV *IVInit = AR->getStart();
     if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
       AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
   }
+  AR = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
+  return AR->evaluateAtIteration(ExitCount, *SE);
+}
 
-  const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
-  const SCEV *IVLimit = ARBase->evaluateAtIteration(ExitCount, *SE);
+/// Insert an IR expression which computes the value held by the IV IndVar
+/// (which must be an loop counter w/unit stride) after the backedge of loop L
+/// is taken ExitCount times.
+static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
+                           const SCEV *ExitCount, bool UsePostInc, Loop *L,
+                           SCEVExpander &Rewriter, ScalarEvolution *SE) {
+  assert(isLoopCounter(IndVar, L, SE));
+  assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
+  const SCEV *IVLimit = getIVLimit(IndVar, ExitCount, UsePostInc, SE);
   assert(SE->isLoopInvariant(IVLimit, L) &&
          "Computed iteration count is not loop invariant!");
-  if (Rewriter.isHighCostExpansion(IVLimit, L, SCEVCheapExpansionBudget, TTI,
-                                   IndVar))
-    return nullptr;
-  return Rewriter.expandCodeFor(IVLimit, ARBase->getType(),
+  return Rewriter.expandCodeFor(IVLimit, IVLimit->getType(),
                                 ExitingBB->getTerminator());
 }
 
@@ -948,36 +948,13 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
 /// able to rewrite the exit tests of any loop where the SCEV analysis can
 /// determine a loop-invariant trip count of the loop, which is actually a much
 /// broader range than just linear tests.
-bool IndVarSimplify::
-linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
-                          const SCEV *ExitCount,
-                          PHINode *IndVar, SCEVExpander &Rewriter) {
-  assert(L->getLoopLatch() && "Loop no longer in simplified form?");
+bool IndVarSimplify::linearFunctionTestReplace(
+    Loop *L, BasicBlock *ExitingBB, const SCEV *ExitCount, PHINode *IndVar,
+    Instruction *IncVar, bool UsePostInc, SCEVExpander &Rewriter) {
   assert(isLoopCounter(IndVar, L, SE));
-  Instruction * const IncVar =
-    cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
 
   // Initialize CmpIndVar to the preincremented IV.
-  Value *CmpIndVar = IndVar;
-  bool UsePostInc = false;
-
-  // If the exiting block is the same as the backedge block, we prefer to
-  // compare against the post-incremented value, otherwise we must compare
-  // against the preincremented value.
-  if (ExitingBB == L->getLoopLatch()) {
-    // For pointer IVs, we chose to not strip inbounds which requires us not
-    // to add a potentially UB introducing use.  We need to either a) show
-    // the loop test we're modifying is already in post-inc form, or b) show
-    // that adding a use must not introduce UB.
-    bool SafeToPostInc =
-        IndVar->getType()->isIntegerTy() ||
-        isLoopExitTestBasedOn(IncVar, ExitingBB) ||
-        mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
-    if (SafeToPostInc) {
-      UsePostInc = true;
-      CmpIndVar = IncVar;
-    }
-  }
+  Value *CmpIndVar = UsePostInc ? IncVar : IndVar;
 
   // It may be necessary to drop nowrap flags on the incrementing instruction
   // if either LFTR moves from a pre-inc check to a post-inc check (in which
@@ -999,10 +976,8 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
   }
 
-  Value *ExitCnt = genLoopLimit(IndVar, ExitingBB, ExitCount, UsePostInc, L,
-                                Rewriter, SE, TTI);
-  if (!ExitCnt)
-    return false;
+  Value *ExitCnt =
+      genLoopLimit(IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
   assert(ExitCnt->getType()->isPointerTy() ==
              IndVar->getType()->isPointerTy() &&
          "genLoopLimit missed a cast");
@@ -1974,8 +1949,6 @@ bool IndVarSimplify::run(Loop *L) {
   // If we have a trip count expression, rewrite the loop's exit condition
   // using it.
   if (!DisableLFTR) {
-    BasicBlock *PreHeader = L->getLoopPreheader();
-
     SmallVector<BasicBlock*, 16> ExitingBlocks;
     L->getExitingBlocks(ExitingBlocks);
     for (BasicBlock *ExitingBB : ExitingBlocks) {
@@ -2007,18 +1980,39 @@ bool IndVarSimplify::run(Loop *L) {
       if (!IndVar)
         continue;
 
+      assert(L->getLoopLatch() && "Loop no longer in simplified form?");
+
+      Instruction *IncVar = cast<Instruction>(
+          IndVar->getIncomingValueForBlock(L->getLoopLatch()));
+
+      // For pointer IVs, we chose to not strip inbounds which requires us not
+      // to add a potentially UB introducing use.  We need to either a) show
+      // the loop test we're modifying is already in post-inc form, or b) show
+      // that adding a use must not introduce UB.
+      bool SafeToPostInc =
+          IndVar->getType()->isIntegerTy() ||
+          isLoopExitTestBasedOn(IncVar, ExitingBB) ||
+          mustExecuteUBIfPoisonOnPathTo(IncVar, ExitingBB->getTerminator(), DT);
+
+      // If the exiting block is the same as the backedge block, we prefer to
+      // compare against the post-incremented value, otherwise we must compare
+      // against the preincremented value.
+      bool UsePostInc = ExitingBB == L->getLoopLatch() && SafeToPostInc;
+
+      // IVLimit is the expression that will get expanded later.
+      const SCEV *IVLimit = getIVLimit(IndVar, ExitCount, UsePostInc, SE);
+
       // Avoid high cost expansions.  Note: This heuristic is questionable in
       // that our definition of "high cost" is not exactly principled.
-      if (Rewriter.isHighCostExpansion(ExitCount, L, SCEVCheapExpansionBudget,
-                                       TTI, PreHeader->getTerminator()))
+      if (Rewriter.isHighCostExpansion(IVLimit, L, SCEVCheapExpansionBudget,
+                                       TTI, ExitingBB->getTerminator()))
         continue;
 
-      if (!Rewriter.isSafeToExpand(ExitCount))
+      if (!Rewriter.isSafeToExpand(IVLimit))
         continue;
 
-      Changed |= linearFunctionTestReplace(L, ExitingBB,
-                                           ExitCount, IndVar,
-                                           Rewriter);
+      Changed |= linearFunctionTestReplace(L, ExitingBB, ExitCount, IndVar,
+                                           IncVar, UsePostInc, Rewriter);
     }
   }
   // Clear the rewriter cache, because values that are in the rewriter's cache
diff --git a/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll b/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll
index 24ba862eebc4284..2e19311294fcbf2 100644
--- a/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll
+++ b/llvm/test/Transforms/IndVarSimplify/X86/loop-invariant-conditions.ll
@@ -521,11 +521,13 @@ for.end:                                          ; preds = %if.end, %entry
 define void @test3_neg(i64 %start) {
 ; CHECK-LABEL: @test3_neg(
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START:%.*]], i64 -1)
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[SMAX]], 1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i64 [[INDVARS_IV]], -1
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[START]], [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
 ; CHECK-NEXT:    br i1 [[CMP1]], label [[LOOP]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void
diff --git a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll
index bbdee0267effb72..9ce529e4403e378 100644
--- a/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll
+++ b/llvm/test/Transforms/IndVarSimplify/post-inc-range.ll
@@ -115,7 +115,8 @@ define void @test_range_metadata(ptr %array_length_ptr, ptr %base,
 ; CHECK-LABEL: @test_range_metadata(
 ; CHECK-NEXT:  for.body.lr.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[START]], 1
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ]
@@ -127,7 +128,8 @@ define void @test_range_metadata(ptr %array_length_ptr, ptr %base,
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]]
+; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
@@ -221,10 +223,11 @@ define void @test_transitive_use(ptr %base, i32 %limit, i32 %start) {
 ; CHECK-LABEL: @test_transitive_use(
 ; CHECK-NEXT:  for.body.lr.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[LIMIT]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[LIMIT:%.*]] to i64
 ; CHECK-NEXT:    [[UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[START]], i32 64)
 ; CHECK-NEXT:    [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[UMAX]] to i64
+; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[START]], 1
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT]], i32 [[TMP5]])
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ]
@@ -236,13 +239,14 @@ define void @test_transitive_use(ptr %base, i32 %limit, i32 %start) {
 ; CHECK-NEXT:    br i1 [[MUL_WITHIN]], label [[GUARDED:%.*]], label [[CONTINUE_2:%.*]]
 ; CHECK:       guarded:
 ; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
-; CHECK-NEXT:    [[RESULT:%.*]] = icmp slt i64 [[TMP4]], [[TMP1]]
+; CHECK-NEXT:    [[RESULT:%.*]] = icmp slt i64 [[TMP4]], [[TMP2]]
 ; CHECK-NEXT:    br i1 [[RESULT]], label [[CONTINUE_2]], label [[FOR_END]]
 ; CHECK:       continue.2:
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP2]]
+; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
@@ -293,14 +297,16 @@ define void @test_guard_one_bb(ptr %base, i32 %limit, i32 %start) {
 ; CHECK-LABEL: @test_guard_one_bb(
 ; CHECK-NEXT:  for.body.lr.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[START]], 1
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ]
 ; CHECK-NEXT:    [[WITHIN_LIMITS:%.*]] = icmp ult i64 [[INDVARS_IV]], 64
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS]]) [ "deopt"() ]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]]
+; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
@@ -333,7 +339,8 @@ define void @test_guard_in_the_same_bb(ptr %base, i32 %limit, i32 %start) {
 ; CHECK-LABEL: @test_guard_in_the_same_bb(
 ; CHECK-NEXT:  for.body.lr.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[START]], 1
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ]
@@ -342,7 +349,8 @@ define void @test_guard_in_the_same_bb(ptr %base, i32 %limit, i32 %start) {
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS]]) [ "deopt"() ]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]]
+; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
@@ -378,7 +386,8 @@ define void @test_guard_in_idom(ptr %base, i32 %limit, i32 %start) {
 ; CHECK-LABEL: @test_guard_in_idom(
 ; CHECK-NEXT:  for.body.lr.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[START]], 1
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ]
@@ -387,7 +396,8 @@ define void @test_guard_in_idom(ptr %base, i32 %limit, i32 %start) {
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]]
+; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
@@ -423,7 +433,8 @@ define void @test_guard_merge_ranges(ptr %base, i32 %limit, i32 %start) {
 ; CHECK-LABEL: @test_guard_merge_ranges(
 ; CHECK-NEXT:  for.body.lr.ph:
 ; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[START:%.*]] to i64
-; CHECK-NEXT:    [[TMP1:%.*]] = sext i32 [[LIMIT:%.*]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[START]], 1
+; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[LIMIT:%.*]], i32 [[TMP1]])
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[TMP0]], [[FOR_BODY_LR_PH:%.*]] ]
@@ -432,7 +443,8 @@ define void @test_guard_merge_ranges(ptr %base, i32 %limit, i32 %start) {
 ; CHECK-NEXT:    [[WITHIN_LIMITS_2:%.*]] = icmp ult i64 [[INDVARS_IV]], 2147483647
 ; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[WITHIN_LIMITS_2]]) [ "deopt"() ]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[TMP1]]
+; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[SMAX]]
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
diff --git a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
index 4112b85e43e20b4..4c55c8975965ae4 100644
--- a/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
+++ b/llvm/test/Transforms/IndVarSimplify/rewrite-loop-exit-values-phi.ll
@@ -24,7 +24,7 @@ define dso_local void @hoge() local_unnamed_addr {
 ; CHECK:       inner:
 ; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[INNER]] ], [ 0, [[INNER_PREHEADER]] ]
 ; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[INNER]] ], [ [[N]], [[INNER_PREHEADER]] ]
-; CHECK-NEXT:    [[I_NEXT]] = add nuw i64 [[I]], 1
+; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
 ; CHECK-NEXT:    [[J_NEXT]] = add nsw i64 [[J]], 1
 ; CHECK-NEXT:    store i64 undef, ptr @ptr, align 8
 ; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp slt i64 [[J]], [[IDX]]

>From db5d91549da6fe6c77c4f54011778e809ffee64b Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 5 Feb 2025 14:36:07 +0000
Subject: [PATCH 3/4] IndVarSimplify: strip stray comment

---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 29813c59afd89bb..a6115e011343ba0 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -953,7 +953,6 @@ bool IndVarSimplify::linearFunctionTestReplace(
     Instruction *IncVar, bool UsePostInc, SCEVExpander &Rewriter) {
   assert(isLoopCounter(IndVar, L, SE));
 
-  // Initialize CmpIndVar to the preincremented IV.
   Value *CmpIndVar = UsePostInc ? IncVar : IndVar;
 
   // It may be necessary to drop nowrap flags on the incrementing instruction

>From ea6d845409d26cfc33b33e4db492efc8557b866e Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <ramkumar.ramachandra at codasip.com>
Date: Wed, 5 Feb 2025 14:50:01 +0000
Subject: [PATCH 4/4] IndVarSimplify: NFC refactor

---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 34 +++++++------------
 1 file changed, 12 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index a6115e011343ba0..fed5dbacfff99ea 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -155,8 +155,8 @@ class IndVarSimplify {
 
   bool linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
                                  const SCEV *ExitCount, PHINode *IndVar,
-                                 Instruction *IncVar, bool UsePostInc,
-                                 SCEVExpander &Rewriter);
+                                 Instruction *IncVar, const SCEV *IVLimit,
+                                 bool UsePostInc, SCEVExpander &Rewriter);
 
   bool sinkUnusedInvariants(Loop *L);
 
@@ -928,21 +928,6 @@ static const SCEV *getIVLimit(PHINode *IndVar, const SCEV *ExitCount,
   return AR->evaluateAtIteration(ExitCount, *SE);
 }
 
-/// Insert an IR expression which computes the value held by the IV IndVar
-/// (which must be an loop counter w/unit stride) after the backedge of loop L
-/// is taken ExitCount times.
-static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
-                           const SCEV *ExitCount, bool UsePostInc, Loop *L,
-                           SCEVExpander &Rewriter, ScalarEvolution *SE) {
-  assert(isLoopCounter(IndVar, L, SE));
-  assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
-  const SCEV *IVLimit = getIVLimit(IndVar, ExitCount, UsePostInc, SE);
-  assert(SE->isLoopInvariant(IVLimit, L) &&
-         "Computed iteration count is not loop invariant!");
-  return Rewriter.expandCodeFor(IVLimit, IVLimit->getType(),
-                                ExitingBB->getTerminator());
-}
-
 /// This method rewrites the exit condition of the loop to be a canonical !=
 /// comparison against the incremented loop induction variable.  This pass is
 /// able to rewrite the exit tests of any loop where the SCEV analysis can
@@ -950,7 +935,8 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
 /// broader range than just linear tests.
 bool IndVarSimplify::linearFunctionTestReplace(
     Loop *L, BasicBlock *ExitingBB, const SCEV *ExitCount, PHINode *IndVar,
-    Instruction *IncVar, bool UsePostInc, SCEVExpander &Rewriter) {
+    Instruction *IncVar, const SCEV *IVLimit, bool UsePostInc,
+    SCEVExpander &Rewriter) {
   assert(isLoopCounter(IndVar, L, SE));
 
   Value *CmpIndVar = UsePostInc ? IncVar : IndVar;
@@ -975,8 +961,11 @@ bool IndVarSimplify::linearFunctionTestReplace(
       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
   }
 
-  Value *ExitCnt =
-      genLoopLimit(IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
+  assert(ExitCount->getType()->isIntegerTy() && "exit count must be integer");
+  assert(SE->isLoopInvariant(IVLimit, L) &&
+         "Computed iteration count is not loop invariant!");
+  Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, IVLimit->getType(),
+                                          ExitingBB->getTerminator());
   assert(ExitCnt->getType()->isPointerTy() ==
              IndVar->getType()->isPointerTy() &&
          "genLoopLimit missed a cast");
@@ -2010,8 +1999,9 @@ bool IndVarSimplify::run(Loop *L) {
       if (!Rewriter.isSafeToExpand(IVLimit))
         continue;
 
-      Changed |= linearFunctionTestReplace(L, ExitingBB, ExitCount, IndVar,
-                                           IncVar, UsePostInc, Rewriter);
+      Changed |=
+          linearFunctionTestReplace(L, ExitingBB, ExitCount, IndVar, IncVar,
+                                    IVLimit, UsePostInc, Rewriter);
     }
   }
   // Clear the rewriter cache, because values that are in the rewriter's cache



More information about the llvm-commits mailing list