[llvm] [IndVarSimplify] Add safety check for getTruncateExpr in genLoopLimit (PR #181296)

Thu Feb 12 19:18:17 PST 2026

https://github.com/gandhi56 created https://github.com/llvm/llvm-project/pull/181296

getTruncateExpr may not always return a SCEVAddRecExpr when truncating
loop bounds. Add a check to verify the result type before casting, and
bail out of the transformation if the cast would be invalid.

This prevents potential crashes from invalid casts when dealing with
complex loop bounds.


>From c3741d0aac35c2dcfc41bc95c647300b16bf2912 Mon Sep 17 00:00:00 2001
From: Anshil Gandhi <Anshil.Gandhi at amd.com>
Date: Tue, 9 Dec 2025 02:06:33 -0600
Subject: [PATCH] [IndVarSimplify] Add safety check for getTruncateExpr in
 genLoopLimit

getTruncateExpr may not always return a SCEVAddRecExpr when truncating
loop bounds. Add a check to verify the result type before casting, and
bail out of the transformation if the cast would be invalid.

This prevents potential crashes from invalid casts when dealing with
complex loop bounds.
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp |  76 +++++----
 .../IndVarSimplify/scev-update-loop-opt.ll    | 149 ++++++++++++++++++
 2 files changed, 192 insertions(+), 33 deletions(-)
 create mode 100644 llvm/test/Transforms/IndVarSimplify/scev-update-loop-opt.ll

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index f377c192371ea..c444fda8c1002 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -106,25 +106,25 @@ static cl::opt<ReplaceExitVal> ReplaceExitValue(
                    "always replace exit value whenever possible")));
 
 static cl::opt<bool> UsePostIncrementRanges(
-  "indvars-post-increment-ranges", cl::Hidden,
-  cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
-  cl::init(true));
+    "indvars-post-increment-ranges", cl::Hidden,
+    cl::desc("Use post increment control-dependent ranges in IndVarSimplify"),
+    cl::init(true));
 
 static cl::opt<bool>
-DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
-            cl::desc("Disable Linear Function Test Replace optimization"));
+    DisableLFTR("disable-lftr", cl::Hidden, cl::init(false),
+                cl::desc("Disable Linear Function Test Replace optimization"));
 
 static cl::opt<bool>
-LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
-                cl::desc("Predicate conditions in read only loops"));
+    LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
+                    cl::desc("Predicate conditions in read only loops"));
 
 static cl::opt<bool> LoopPredicationTraps(
     "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
     cl::desc("Predicate conditions that trap in loops with only local writes"));
 
 static cl::opt<bool>
-AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
-                cl::desc("Allow widening of indvars to eliminate s/zext"));
+    AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
+                    cl::desc("Allow widening of indvars to eliminate s/zext"));
 
 namespace {
 
@@ -645,8 +645,8 @@ static void visitIVCast(CastInst *Cast, WideIVInfo &WI,
   // induction variable when necessary.
   if (TTI &&
       TTI->getArithmeticInstrCost(Instruction::Add, Ty) >
-          TTI->getArithmeticInstrCost(Instruction::Add,
-                                      Cast->getOperand(0)->getType())) {
+                 TTI->getArithmeticInstrCost(Instruction::Add,
+                                             Cast->getOperand(0)->getType())) {
     return;
   }
 
@@ -685,7 +685,7 @@ class IndVarSimplifyVisitor : public IVVisitor {
   IndVarSimplifyVisitor(PHINode *IV, ScalarEvolution *SCEV,
                         const TargetTransformInfo *TTI,
                         const DominatorTree *DTree)
-    : SE(SCEV), TTI(TTI), IVPhi(IV) {
+      : SE(SCEV), TTI(TTI), IVPhi(IV) {
     DT = DTree;
     WI.NarrowIV = IVPhi;
   }
@@ -1031,10 +1031,17 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
   // exit count add(zext(add)) expression.
   if (IndVar->getType()->isIntegerTy() &&
       SE->getTypeSizeInBits(AR->getType()) >
-      SE->getTypeSizeInBits(ExitCount->getType())) {
+          SE->getTypeSizeInBits(ExitCount->getType())) {
     const SCEV *IVInit = AR->getStart();
-    if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount))
-      AR = cast<SCEVAddRecExpr>(SE->getTruncateExpr(AR, ExitCount->getType()));
+    if (!isa<SCEVConstant>(IVInit) || !isa<SCEVConstant>(ExitCount)) {
+      const SCEV *TruncExpr = SE->getTruncateExpr(AR, ExitCount->getType());
+
+      // The following bailout is necessary due to the interaction with
+      // the depth limit in SCEV analysis.
+      if (!isa<SCEVAddRecExpr>(TruncExpr))
+        return nullptr;
+      AR = cast<SCEVAddRecExpr>(TruncExpr);
+    }
   }
 
   const SCEVAddRecExpr *ARBase = UsePostInc ? AR->getPostIncExpr(*SE) : AR;
@@ -1052,12 +1059,12 @@ static Value *genLoopLimit(PHINode *IndVar, BasicBlock *ExitingBB,
 /// broader range than just linear tests.
 bool IndVarSimplify::
 linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
-                          const SCEV *ExitCount,
+                                               const SCEV *ExitCount,
                           PHINode *IndVar, SCEVExpander &Rewriter) {
   assert(L->getLoopLatch() && "Loop no longer in simplified form?");
   assert(isLoopCounter(IndVar, L, SE));
   Instruction * const IncVar =
-    cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
+      cast<Instruction>(IndVar->getIncomingValueForBlock(L->getLoopLatch()));
 
   // Initialize CmpIndVar to the preincremented IV.
   Value *CmpIndVar = IndVar;
@@ -1081,6 +1088,15 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
     }
   }
 
+  Value *ExitCnt =
+      genLoopLimit(IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
+  if (!ExitCnt)
+    return false;
+
+  assert(ExitCnt->getType()->isPointerTy() ==
+             IndVar->getType()->isPointerTy() &&
+         "genLoopLimit missed a cast");
+
   // It may be necessary to drop nowrap flags on the incrementing instruction
   // if either LFTR moves from a pre-inc check to a post-inc check (in which
   // case the increment might have previously been poison on the last iteration
@@ -1101,12 +1117,6 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
       BO->setHasNoSignedWrap(AR->hasNoSignedWrap());
   }
 
-  Value *ExitCnt = genLoopLimit(
-      IndVar, ExitingBB, ExitCount, UsePostInc, L, Rewriter, SE);
-  assert(ExitCnt->getType()->isPointerTy() ==
-             IndVar->getType()->isPointerTy() &&
-         "genLoopLimit missed a cast");
-
   // Insert a new icmp_ne or icmp_eq instruction before the branch.
   BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
   ICmpInst::Predicate P;
@@ -1142,7 +1152,7 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
     const SCEV *IV = SE->getSCEV(CmpIndVar);
     const SCEV *TruncatedIV = SE->getTruncateExpr(IV, ExitCnt->getType());
     const SCEV *ZExtTrunc =
-      SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
+        SE->getZeroExtendExpr(TruncatedIV, CmpIndVar->getType());
 
     if (ZExtTrunc == IV) {
       Extended = true;
@@ -1150,7 +1160,7 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
                                    "wide.trip.count");
     } else {
       const SCEV *SExtTrunc =
-        SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
+          SE->getSignExtendExpr(TruncatedIV, CmpIndVar->getType());
       if (SExtTrunc == IV) {
         Extended = true;
         ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
@@ -1693,16 +1703,16 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
   // all exits must dominate the latch, so there is a total dominance order
   // between them.
   llvm::sort(ExitingBlocks, [&](BasicBlock *A, BasicBlock *B) {
-               // std::sort sorts in ascending order, so we want the inverse of
-               // the normal dominance relation.
+    // std::sort sorts in ascending order, so we want the inverse of
+    // the normal dominance relation.
                if (A == B) return false;
-               if (DT->properlyDominates(A, B))
-                 return true;
-               else {
+    if (DT->properlyDominates(A, B))
+      return true;
+    else {
                  assert(DT->properlyDominates(B, A) &&
                         "expected total dominance order!");
-                 return false;
-               }
+      return false;
+    }
   });
 #ifdef ASSERT
   for (unsigned i = 1; i < ExitingBlocks.size(); i++) {
@@ -1881,7 +1891,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
     // within the loop which contains them.  This assumes trivially lcssa phis
     // have already been removed; TODO: generalize
     BasicBlock *ExitBlock =
-    BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
+        BI->getSuccessor(L->contains(BI->getSuccessor(0)) ? 1 : 0);
     if (!ExitBlock->phis().empty())
       return true;
 
diff --git a/llvm/test/Transforms/IndVarSimplify/scev-update-loop-opt.ll b/llvm/test/Transforms/IndVarSimplify/scev-update-loop-opt.ll
new file mode 100644
index 0000000000000..f716796745fb7
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/scev-update-loop-opt.ll
@@ -0,0 +1,149 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt %s -passes="loop(loop-idiom,indvars,loop-deletion,loop-unroll-full)" -S | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+
+define void @loop_limit_test(i32 %conv5, i1 %cmp13, i1 %cmp20, i1 %cmp27, i1 %cmp34, i1 %cmp41) local_unnamed_addr {
+; CHECK-LABEL: define void @loop_limit_test(
+; CHECK-SAME: i32 [[CONV5:%.*]], i1 [[CMP13:%.*]], i1 [[CMP20:%.*]], i1 [[CMP27:%.*]], i1 [[CMP34:%.*]], i1 [[CMP41:%.*]]) local_unnamed_addr {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[CONV5]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[CONV5]] to i64
+; CHECK-NEXT:    br label %[[FOR_COND:.*]]
+; CHECK:       [[FOR_COND_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND]]
+; CHECK:       [[FOR_COND]]:
+; CHECK-NEXT:    br label %[[FOR_COND2:.*]]
+; CHECK:       [[FOR_COND2]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_COND_CLEANUP14:.*]] ], [ 0, %[[FOR_COND]] ]
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp samesign ult i64 [[INDVARS_IV]], [[TMP2]]
+; CHECK-NEXT:    br i1 [[CMP6]], label %[[FOR_COND9_PREHEADER:.*]], label %[[FOR_COND_LOOPEXIT]]
+; CHECK:       [[FOR_COND9_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_COND9:.*]]
+; CHECK:       [[FOR_COND9_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND9]]
+; CHECK:       [[FOR_COND9]]:
+; CHECK-NEXT:    br i1 [[CMP13]], label %[[FOR_COND16_PREHEADER:.*]], label %[[FOR_COND_CLEANUP14]]
+; CHECK:       [[FOR_COND16_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_COND16:.*]]
+; CHECK:       [[FOR_COND_CLEANUP14]]:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    br label %[[FOR_COND2]]
+; CHECK:       [[FOR_COND16_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND16]]
+; CHECK:       [[FOR_COND16]]:
+; CHECK-NEXT:    br i1 [[CMP20]], label %[[FOR_COND23_PREHEADER:.*]], label %[[FOR_COND9_LOOPEXIT]]
+; CHECK:       [[FOR_COND23_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_COND23:.*]]
+; CHECK:       [[FOR_COND23_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND23]]
+; CHECK:       [[FOR_COND23]]:
+; CHECK-NEXT:    br i1 [[CMP27]], label %[[FOR_COND30_PREHEADER:.*]], label %[[FOR_COND16_LOOPEXIT]]
+; CHECK:       [[FOR_COND30_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_COND30:.*]]
+; CHECK:       [[FOR_COND30_LOOPEXIT_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND30_LOOPEXIT:.*]]
+; CHECK:       [[FOR_COND30_LOOPEXIT]]:
+; CHECK-NEXT:    br label %[[FOR_COND30]]
+; CHECK:       [[FOR_COND30]]:
+; CHECK-NEXT:    br i1 [[CMP34]], label %[[FOR_COND37_PREHEADER:.*]], label %[[FOR_COND23_LOOPEXIT]]
+; CHECK:       [[FOR_COND37_PREHEADER]]:
+; CHECK-NEXT:    br label %[[FOR_COND37_PEEL_BEGIN:.*]]
+; CHECK:       [[FOR_COND37_PEEL_BEGIN]]:
+; CHECK-NEXT:    br label %[[FOR_COND37_PEEL:.*]]
+; CHECK:       [[FOR_COND37_PEEL]]:
+; CHECK-NEXT:    br i1 [[CMP41]], label %[[FOR_BODY43_PEEL:.*]], label %[[FOR_COND30_LOOPEXIT]]
+; CHECK:       [[FOR_BODY43_PEEL]]:
+; CHECK-NEXT:    [[CONV45_PEEL:%.*]] = zext i32 0 to i64
+; CHECK-NEXT:    [[CALL31_I_I_PEEL:%.*]] = load volatile i64, ptr null, align 8
+; CHECK-NEXT:    [[MUL79_I_I_PEEL:%.*]] = mul i64 [[CALL31_I_I_PEEL]], [[INDVARS_IV]]
+; CHECK-NEXT:    [[DOTIDX1_PEEL:%.*]] = add i64 [[CONV45_PEEL]], [[MUL79_I_I_PEEL]]
+; CHECK-NEXT:    [[SUB_PTR_LHS_CAST_PEEL:%.*]] = shl i64 [[DOTIDX1_PEEL]], 2
+; CHECK-NEXT:    [[SUB_PTR_DIV_PEEL:%.*]] = ashr exact i64 [[SUB_PTR_LHS_CAST_PEEL]], 1
+; CHECK-NEXT:    [[CMP55_PEEL:%.*]] = icmp sgt i64 0, 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP55_PEEL]])
+; CHECK-NEXT:    br label %[[FOR_COND37_PEEL_NEXT:.*]]
+; CHECK:       [[FOR_COND37_PEEL_NEXT]]:
+; CHECK-NEXT:    br label %[[FOR_COND37_PEEL_NEXT1:.*]]
+; CHECK:       [[FOR_COND37_PEEL_NEXT1]]:
+; CHECK-NEXT:    br label %[[FOR_COND37_PREHEADER_PEEL_NEWPH:.*]]
+; CHECK:       [[FOR_COND37_PREHEADER_PEEL_NEWPH]]:
+; CHECK-NEXT:    br label %[[FOR_COND37:.*]]
+; CHECK:       [[FOR_COND37]]:
+; CHECK-NEXT:    [[OFFSET_619:%.*]] = phi i64 [ [[SUB_PTR_DIV:%.*]], %[[FOR_BODY43:.*]] ], [ [[SUB_PTR_DIV_PEEL]], %[[FOR_COND37_PREHEADER_PEEL_NEWPH]] ]
+; CHECK-NEXT:    br i1 [[CMP41]], label %[[FOR_BODY43]], label %[[FOR_COND30_LOOPEXIT_LOOPEXIT]]
+; CHECK:       [[FOR_BODY43]]:
+; CHECK-NEXT:    [[CALL31_I_I:%.*]] = load volatile i64, ptr null, align 8
+; CHECK-NEXT:    [[ADD33_I_I:%.*]] = add i64 [[INDVARS_IV]], [[CALL31_I_I]]
+; CHECK-NEXT:    [[MUL42_I_I:%.*]] = mul i64 [[TMP1]], [[ADD33_I_I]]
+; CHECK-NEXT:    [[ADD43_I_I:%.*]] = add i64 [[MUL42_I_I]], 1
+; CHECK-NEXT:    [[MUL52_I_I:%.*]] = mul i64 [[TMP1]], [[ADD43_I_I]]
+; CHECK-NEXT:    [[ADD53_I_I:%.*]] = add i64 [[MUL52_I_I]], 1
+; CHECK-NEXT:    [[MUL62_I_I:%.*]] = mul i64 [[TMP1]], [[ADD53_I_I]]
+; CHECK-NEXT:    [[ADD63_I_I:%.*]] = add i64 [[MUL62_I_I]], 1
+; CHECK-NEXT:    [[MUL72_I_I:%.*]] = mul i64 [[INDVARS_IV]], [[ADD63_I_I]]
+; CHECK-NEXT:    [[MUL79_I_I:%.*]] = mul i64 [[CALL31_I_I]], [[MUL72_I_I]]
+; CHECK-NEXT:    [[DOTIDX1:%.*]] = add i64 [[TMP1]], [[MUL79_I_I]]
+; CHECK-NEXT:    [[SUB_PTR_LHS_CAST:%.*]] = shl i64 [[DOTIDX1]], 2
+; CHECK-NEXT:    [[SUB_PTR_DIV]] = ashr exact i64 [[SUB_PTR_LHS_CAST]], 1
+; CHECK-NEXT:    [[CMP55:%.*]] = icmp sgt i64 [[OFFSET_619]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP55]])
+; CHECK-NEXT:    br label %[[FOR_COND37]], !llvm.loop [[LOOP0:![0-9]+]]
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond2, %entry
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.cond.cleanup14, %for.cond
+  %i5.0 = phi i32 [ 0, %for.cond ], [ %inc70, %for.cond.cleanup14 ]
+  %cmp6 = icmp ult i32 %i5.0, %conv5
+  br i1 %cmp6, label %for.cond9, label %for.cond
+
+for.cond9:                                        ; preds = %for.cond16, %for.cond2
+  br i1 %cmp13, label %for.cond16, label %for.cond.cleanup14
+
+for.cond.cleanup14:                               ; preds = %for.cond9
+  %inc70 = add i32 %i5.0, 1
+  br label %for.cond2
+
+for.cond16:                                       ; preds = %for.cond23, %for.cond9
+  br i1 %cmp20, label %for.cond23, label %for.cond9
+
+for.cond23:                                       ; preds = %for.cond30, %for.cond16
+  br i1 %cmp27, label %for.cond30, label %for.cond16
+
+for.cond30:                                       ; preds = %for.cond37, %for.cond23
+  br i1 %cmp34, label %for.cond37, label %for.cond23
+
+for.cond37:                                       ; preds = %for.body43, %for.cond30
+  %i0.018 = phi i32 [ %inc, %for.body43 ], [ 0, %for.cond30 ]
+  %offset.619 = phi i64 [ %sub.ptr.div, %for.body43 ], [ 0, %for.cond30 ]
+  br i1 %cmp41, label %for.body43, label %for.cond30
+
+for.body43:                                       ; preds = %for.cond37
+  %conv45 = zext i32 %i0.018 to i64
+  %conv50 = zext i32 %i5.0 to i64
+  %call31.i.i = load volatile i64, ptr null, align 8
+  %add33.i.i = add i64 %conv50, %call31.i.i
+  %mul42.i.i = mul i64 %conv45, %add33.i.i
+  %add43.i.i = add i64 %mul42.i.i, 1
+  %mul52.i.i = mul i64 %conv45, %add43.i.i
+  %add53.i.i = add i64 %mul52.i.i, 1
+  %mul62.i.i = mul i64 %conv45, %add53.i.i
+  %add63.i.i = add i64 %mul62.i.i, 1
+  %mul72.i.i = mul i64 %conv50, %add63.i.i
+  %mul79.i.i = mul i64 %call31.i.i, %mul72.i.i
+  %.idx1 = add i64 %conv45, %mul79.i.i
+  %sub.ptr.lhs.cast = shl i64 %.idx1, 2
+  %sub.ptr.div = ashr exact i64 %sub.ptr.lhs.cast, 1
+  %cmp55 = icmp sgt i64 %offset.619, 0
+  call void @llvm.assume(i1 %cmp55)
+  %inc = add i32 %conv5, 1
+  br label %for.cond37
+}
+
+declare void @llvm.assume(i1 noundef)