[llvm] [IndVarSimplify] Allow predicateLoopExit on some loops with local writes (PR #155901)

Florian Mayer via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 28 14:43:08 PDT 2025


https://github.com/fmayer updated https://github.com/llvm/llvm-project/pull/155901

>From 70490db22f12308a8e9086235d74f99a5c964f8c Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Thu, 28 Aug 2025 11:12:57 -0700
Subject: [PATCH 1/8] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp |  41 ++-
 .../ScalarEvolution/unreachable-exit.ll       | 291 ++++++++++++++++++
 2 files changed, 330 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c32731185afd0..ddec84698cb35 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -53,6 +53,7 @@
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PatternMatch.h"
@@ -1816,11 +1817,20 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
   // suggestions on how to improve this?  I can obviously bail out for outer
   // loops, but that seems less than ideal.  MemorySSA can find memory writes,
   // is that enough for *all* side effects?
+  bool HasLocalSideEffects = false;
   for (BasicBlock *BB : L->blocks())
     for (auto &I : *BB)
       // TODO:isGuaranteedToTransfer
-      if (I.mayHaveSideEffects())
-        return false;
+      if (I.mayHaveSideEffects()) {
+        HasLocalSideEffects = true;
+        if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+          if (findAllocaForValue(SI->getPointerOperand(), false) == nullptr) {
+            return false;
+          }
+        } else {
+          return false;
+        }
+      }
 
   bool Changed = false;
   // Finally, do the actual predication for all predicatable blocks.  A couple
@@ -1840,6 +1850,33 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
     const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
 
     auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
+    if (HasLocalSideEffects) {
+      BasicBlock *Unreachable = nullptr;
+      BasicBlock *InLoop = nullptr;
+      for (BasicBlock *Succ : BI->successors()) {
+        if (isa<UnreachableInst>(Succ->getTerminator()))
+          Unreachable = Succ;
+        if (L->contains(Succ))
+          InLoop = Succ;
+      }
+      // Exit BB which have one branch back into the loop and another one to
+      // a trap can still be optimized, because local side effects cannot
+      // be observed in the exit case (the trap).
+      if (Unreachable == nullptr || InLoop == nullptr)
+        continue;
+      if (llvm::any_of(*Unreachable, [](Instruction &I) {
+            if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+              if (II->getIntrinsicID() != Intrinsic::trap &&
+                  II->getIntrinsicID() != Intrinsic::ubsantrap)
+                return true;
+            } else if (!isa<UnreachableInst>(I)) {
+              return true;
+            }
+            return false;
+          })) {
+        continue;
+      }
+    }
     Value *NewCond;
     if (ExitCount == ExactBTC) {
       NewCond = L->contains(BI->getSuccessor(0)) ?
diff --git a/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll b/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll
new file mode 100644
index 0000000000000..eaf6ad9f0d4f7
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll
@@ -0,0 +1,291 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=indvars < %s | FileCheck %s
+
+source_filename = "/usr/local/google/home/fmayer/loop/src3.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @foo(i32 noundef %block_size) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @foo(
+; CHECK-SAME: i32 noundef [[BLOCK_SIZE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[FOO_ARR:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT:    [[BAR_ARR:%.*]] = alloca [1025 x i8], align 16
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @x(ptr noundef nonnull [[FOO_ARR]])
+; CHECK-NEXT:    [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT:    br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[BLOCK_SIZE]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[BLOCK_SIZE]], -1
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 1024)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 1024, [[UMIN]]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    call void @x(ptr noundef nonnull [[BAR_ARR]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       [[IF_END4]]:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP8:![0-9]+]]
+;
+entry:
+  %foo_arr = alloca [1024 x i8], align 16
+  %bar_arr = alloca [1025 x i8], align 16
+  call void @llvm.lifetime.start.p0(ptr nonnull %foo_arr) #4
+  call void @llvm.lifetime.start.p0(ptr nonnull %bar_arr) #4
+  call void @x(ptr noundef nonnull %foo_arr)
+  %cmp14.not = icmp eq i32 %block_size, 0
+  br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %if.end4
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  call void @x(ptr noundef nonnull %bar_arr)
+  call void @llvm.lifetime.end.p0(ptr nonnull %bar_arr) #4
+  call void @llvm.lifetime.end.p0(ptr nonnull %foo_arr) #4
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %if.end4
+  %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+  %cmp1 = icmp samesign ugt i32 %i.015, 1023
+  br i1 %cmp1, label %if.then, label %if.end4
+
+if.then:                                          ; preds = %for.body
+  call void @llvm.trap()
+  unreachable
+
+if.end4:                                          ; preds = %for.body
+  %idxprom = zext nneg i32 %i.015 to i64
+  %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1, !tbaa !5
+  %1 = xor i8 %0, 54
+  %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i64 %idxprom
+  store i8 %1, ptr %arrayidx7, align 1, !tbaa !5
+  %inc = add nuw nsw i32 %i.015, 1
+  %cmp = icmp ult i32 %inc, %block_size
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !8
+}
+
+define dso_local void @foo_ubsan(i32 noundef %block_size) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @foo_ubsan(
+; CHECK-SAME: i32 noundef [[BLOCK_SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[FOO_ARR:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT:    [[BAR_ARR:%.*]] = alloca [1025 x i8], align 16
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @x(ptr noundef nonnull [[FOO_ARR]])
+; CHECK-NEXT:    [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT:    br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[BLOCK_SIZE]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[BLOCK_SIZE]], -1
+; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 1024)
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 1024, [[UMIN]]
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    call void @x(ptr noundef nonnull [[BAR_ARR]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    call void @llvm.ubsantrap(i8 1)
+; CHECK-NEXT:    unreachable
+; CHECK:       [[IF_END4]]:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP8]]
+;
+entry:
+  %foo_arr = alloca [1024 x i8], align 16
+  %bar_arr = alloca [1025 x i8], align 16
+  call void @llvm.lifetime.start.p0(ptr nonnull %foo_arr) #4
+  call void @llvm.lifetime.start.p0(ptr nonnull %bar_arr) #4
+  call void @x(ptr noundef nonnull %foo_arr)
+  %cmp14.not = icmp eq i32 %block_size, 0
+  br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %if.end4
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  call void @x(ptr noundef nonnull %bar_arr)
+  call void @llvm.lifetime.end.p0(ptr nonnull %bar_arr) #4
+  call void @llvm.lifetime.end.p0(ptr nonnull %foo_arr) #4
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %if.end4
+  %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+  %cmp1 = icmp samesign ugt i32 %i.015, 1023
+  br i1 %cmp1, label %if.then, label %if.end4
+
+if.then:                                          ; preds = %for.body
+  call void @llvm.ubsantrap(i8 1)
+  unreachable
+
+if.end4:                                          ; preds = %for.body
+  %idxprom = zext nneg i32 %i.015 to i64
+  %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1, !tbaa !5
+  %1 = xor i8 %0, 54
+  %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i64 %idxprom
+  store i8 %1, ptr %arrayidx7, align 1, !tbaa !5
+  %inc = add nuw nsw i32 %i.015, 1
+  %cmp = icmp ult i32 %inc, %block_size
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !8
+}
+
+define dso_local void @foo_randomcall(i32 noundef %block_size) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @foo_randomcall(
+; CHECK-SAME: i32 noundef [[BLOCK_SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[FOO_ARR:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT:    [[BAR_ARR:%.*]] = alloca [1025 x i8], align 16
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @x(ptr noundef nonnull [[FOO_ARR]])
+; CHECK-NEXT:    [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT:    br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[BLOCK_SIZE]] to i64
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    call void @x(ptr noundef nonnull [[BAR_ARR]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[EXITCOND1:%.*]] = icmp eq i64 [[INDVARS_IV]], 1024
+; CHECK-NEXT:    br i1 [[EXITCOND1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    call void @noreturn(ptr [[FOO_ARR]])
+; CHECK-NEXT:    unreachable
+; CHECK:       [[IF_END4]]:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP8]]
+;
+entry:
+  %foo_arr = alloca [1024 x i8], align 16
+  %bar_arr = alloca [1025 x i8], align 16
+  call void @llvm.lifetime.start.p0(ptr nonnull %foo_arr) #4
+  call void @llvm.lifetime.start.p0(ptr nonnull %bar_arr) #4
+  call void @x(ptr noundef nonnull %foo_arr)
+  %cmp14.not = icmp eq i32 %block_size, 0
+  br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %if.end4
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  call void @x(ptr noundef nonnull %bar_arr)
+  call void @llvm.lifetime.end.p0(ptr nonnull %bar_arr) #4
+  call void @llvm.lifetime.end.p0(ptr nonnull %foo_arr) #4
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %if.end4
+  %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+  %cmp1 = icmp samesign ugt i32 %i.015, 1023
+  br i1 %cmp1, label %if.then, label %if.end4
+
+if.then:                                          ; preds = %for.body
+  call void @noreturn(ptr %foo_arr)
+  unreachable
+
+if.end4:                                          ; preds = %for.body
+  %idxprom = zext nneg i32 %i.015 to i64
+  %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1, !tbaa !5
+  %1 = xor i8 %0, 54
+  %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i64 %idxprom
+  store i8 %1, ptr %arrayidx7, align 1, !tbaa !5
+  %inc = add nuw nsw i32 %i.015, 1
+  %cmp = icmp ult i32 %inc, %block_size
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !8
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(ptr captures(none)) #1
+
+declare void @x(ptr noundef) local_unnamed_addr #2
+
+; Function Attrs: cold noreturn nounwind memory(inaccessiblemem: write)
+declare void @llvm.trap() #3
+
+declare void @noreturn(ptr) #3
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(ptr captures(none)) #1
+
+attributes #0 = { mustprogress uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { cold noreturn nounwind memory(inaccessiblemem: write) }
+attributes #4 = { nounwind }
+
+!llvm.module.flags = !{!0, !1, !2, !3}
+!llvm.ident = !{!4}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 8, !"PIC Level", i32 2}
+!2 = !{i32 7, !"PIE Level", i32 2}
+!3 = !{i32 7, !"uwtable", i32 2}
+!4 = !{!"clang version 22.0.0git (/usr/local/google/home/fmayer/large/llvm-project/clang 9bebffa6127aff780f20cc05694c31230580d4ff)"}
+!5 = !{!6, !6, i64 0}
+!6 = !{!"omnipotent char", !7, i64 0}
+!7 = !{!"Simple C++ TBAA"}
+!8 = distinct !{!8, !9}
+!9 = !{!"llvm.loop.mustprogress"}
+;.
+; CHECK: [[TBAA5]] = !{[[META6:![0-9]+]], [[META6]], i64 0}
+; CHECK: [[META6]] = !{!"omnipotent char", [[META7:![0-9]+]], i64 0}
+; CHECK: [[META7]] = !{!"Simple C++ TBAA"}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META9:![0-9]+]]}
+; CHECK: [[META9]] = !{!"llvm.loop.mustprogress"}
+;.

>From 78851f9355ff76704769c42e4b340d2ec45ef7fe Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Thu, 28 Aug 2025 11:17:49 -0700
Subject: [PATCH 2/8] comment

Created using spr 1.3.4
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index ddec84698cb35..4a3bcd95050c5 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1861,7 +1861,8 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
       }
       // Exit BB which have one branch back into the loop and another one to
       // a trap can still be optimized, because local side effects cannot
-      // be observed in the exit case (the trap).
+      // be observed in the exit case (the trap). We could be smarter about
+      // this, but for now lets pattern match common cases that directly trap.
       if (Unreachable == nullptr || InLoop == nullptr)
         continue;
       if (llvm::any_of(*Unreachable, [](Instruction &I) {

>From 5d1e1abb2b413e8917f01143ebe697e3b671275c Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Thu, 28 Aug 2025 11:21:46 -0700
Subject: [PATCH 3/8] add flag

Created using spr 1.3.4
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 4a3bcd95050c5..4a2d30fb882d3 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -118,6 +118,10 @@ static cl::opt<bool>
 LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
                 cl::desc("Predicate conditions in read only loops"));
 
+static cl::opt<bool> LoopPredicationTraps(
+    "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
+    cl::desc("Predicate conditions that trap in loops with only local writes"));
+
 static cl::opt<bool>
 AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
                 cl::desc("Allow widening of indvars to eliminate s/zext"));
@@ -1822,11 +1826,12 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
     for (auto &I : *BB)
       // TODO:isGuaranteedToTransfer
       if (I.mayHaveSideEffects()) {
+        if (!LoopPredicationTraps)
+          return false;
         HasLocalSideEffects = true;
         if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
-          if (findAllocaForValue(SI->getPointerOperand(), false) == nullptr) {
+          if (findAllocaForValue(SI->getPointerOperand(), false) == nullptr)
             return false;
-          }
         } else {
           return false;
         }

>From f896688ac0f376e5930b490b32a599ba1a2bfbf8 Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Thu, 28 Aug 2025 13:28:08 -0700
Subject: [PATCH 4/8] atomic and volatile

Created using spr 1.3.4
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp |   9 +-
 .../ScalarEvolution/unreachable-exit.ll       | 162 ++++++++++++++++++
 2 files changed, 170 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 4a2d30fb882d3..91e06f10fbe4c 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1830,7 +1830,14 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
           return false;
         HasLocalSideEffects = true;
         if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
-          if (findAllocaForValue(SI->getPointerOperand(), false) == nullptr)
+          // The local could have leaked out of the function, so we need to
+          // consider atomic operations as effects.
+          // Because we need to preserve the relative order of volatile
+          // accesses, turn off this optimization if we see any of them.
+          // We could be smarter about volatile, and check whether the
+          // reordering is valid.
+          if (SI->isAtomic() || SI->isVolatile() ||
+              findAllocaForValue(SI->getPointerOperand(), false) == nullptr)
             return false;
         } else {
           return false;
diff --git a/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll b/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll
index eaf6ad9f0d4f7..4a035ce1a63a5 100644
--- a/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll
+++ b/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll
@@ -88,6 +88,168 @@ if.end4:                                          ; preds = %for.body
   br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !8
 }
 
+; Function Attrs: mustprogress uwtable
+define dso_local void @foo_atomic(i32 noundef %block_size) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @foo_atomic(
+; CHECK-SAME: i32 noundef [[BLOCK_SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[FOO_ARR:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT:    [[BAR_ARR:%.*]] = alloca [1025 x i8], align 16
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @x(ptr noundef nonnull [[FOO_ARR]])
+; CHECK-NEXT:    [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT:    br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[BLOCK_SIZE]] to i64
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    call void @x(ptr noundef nonnull [[BAR_ARR]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[EXITCOND1:%.*]] = icmp eq i64 [[INDVARS_IV]], 1024
+; CHECK-NEXT:    br i1 [[EXITCOND1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       [[IF_END4]]:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store atomic i8 [[TMP4]], ptr [[ARRAYIDX7]] unordered, align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP8]]
+;
+entry:
+  %foo_arr = alloca [1024 x i8], align 16
+  %bar_arr = alloca [1025 x i8], align 16
+  call void @llvm.lifetime.start.p0(ptr nonnull %foo_arr) #4
+  call void @llvm.lifetime.start.p0(ptr nonnull %bar_arr) #4
+  call void @x(ptr noundef nonnull %foo_arr)
+  %cmp14.not = icmp eq i32 %block_size, 0
+  br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %if.end4
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  call void @x(ptr noundef nonnull %bar_arr)
+  call void @llvm.lifetime.end.p0(ptr nonnull %bar_arr) #4
+  call void @llvm.lifetime.end.p0(ptr nonnull %foo_arr) #4
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %if.end4
+  %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+  %cmp1 = icmp samesign ugt i32 %i.015, 1023
+  br i1 %cmp1, label %if.then, label %if.end4
+
+if.then:                                          ; preds = %for.body
+  call void @llvm.trap()
+  unreachable
+
+if.end4:                                          ; preds = %for.body
+  %idxprom = zext nneg i32 %i.015 to i64
+  %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1, !tbaa !5
+  %1 = xor i8 %0, 54
+  %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i64 %idxprom
+  store atomic i8 %1, ptr %arrayidx7 unordered, align 1, !tbaa !5
+  %inc = add nuw nsw i32 %i.015, 1
+  %cmp = icmp ult i32 %inc, %block_size
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !8
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @foo_volatile(i32 noundef %block_size) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @foo_volatile(
+; CHECK-SAME: i32 noundef [[BLOCK_SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[FOO_ARR:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT:    [[BAR_ARR:%.*]] = alloca [1025 x i8], align 16
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @x(ptr noundef nonnull [[FOO_ARR]])
+; CHECK-NEXT:    [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT:    br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[BLOCK_SIZE]] to i64
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    call void @x(ptr noundef nonnull [[BAR_ARR]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[EXITCOND1:%.*]] = icmp eq i64 [[INDVARS_IV]], 1024
+; CHECK-NEXT:    br i1 [[EXITCOND1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       [[IF_END4]]:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    store volatile i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP8]]
+;
+entry:
+  %foo_arr = alloca [1024 x i8], align 16
+  %bar_arr = alloca [1025 x i8], align 16
+  call void @llvm.lifetime.start.p0(ptr nonnull %foo_arr) #4
+  call void @llvm.lifetime.start.p0(ptr nonnull %bar_arr) #4
+  call void @x(ptr noundef nonnull %foo_arr)
+  %cmp14.not = icmp eq i32 %block_size, 0
+  br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %if.end4
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  call void @x(ptr noundef nonnull %bar_arr)
+  call void @llvm.lifetime.end.p0(ptr nonnull %bar_arr) #4
+  call void @llvm.lifetime.end.p0(ptr nonnull %foo_arr) #4
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %if.end4
+  %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+  %cmp1 = icmp samesign ugt i32 %i.015, 1023
+  br i1 %cmp1, label %if.then, label %if.end4
+
+if.then:                                          ; preds = %for.body
+  call void @llvm.trap()
+  unreachable
+
+if.end4:                                          ; preds = %for.body
+  %idxprom = zext nneg i32 %i.015 to i64
+  %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1, !tbaa !5
+  %1 = xor i8 %0, 54
+  %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i64 %idxprom
+  store volatile i8 %1, ptr %arrayidx7, align 1, !tbaa !5
+  %inc = add nuw nsw i32 %i.015, 1
+  %cmp = icmp ult i32 %inc, %block_size
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !8
+}
+
 define dso_local void @foo_ubsan(i32 noundef %block_size) local_unnamed_addr #0 {
 ; CHECK-LABEL: define dso_local void @foo_ubsan(
 ; CHECK-SAME: i32 noundef [[BLOCK_SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {

>From a4f857bb7b216d9c36b637f05538c421dbc25967 Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Thu, 28 Aug 2025 13:29:08 -0700
Subject: [PATCH 5/8] TODO

Created using spr 1.3.4
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 91e06f10fbe4c..e0b8a0c92d4a3 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1834,8 +1834,11 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
           // consider atomic operations as effects.
           // Because we need to preserve the relative order of volatile
           // accesses, turn off this optimization if we see any of them.
+          // TODO:
           // We could be smarter about volatile, and check whether the
           // reordering is valid.
+          // We also could be smarter about atomic, and check whether the
+          // local has leaked.
           if (SI->isAtomic() || SI->isVolatile() ||
               findAllocaForValue(SI->getPointerOperand(), false) == nullptr)
             return false;

>From c278bd46393f8cd43f62459e31d401d783a6b688 Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Thu, 28 Aug 2025 13:49:06 -0700
Subject: [PATCH 6/8] style

Created using spr 1.3.4
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index e0b8a0c92d4a3..55b12dd2d34c3 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1871,7 +1871,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
       for (BasicBlock *Succ : BI->successors()) {
         if (isa<UnreachableInst>(Succ->getTerminator()))
           Unreachable = Succ;
-        if (L->contains(Succ))
+        else if (L->contains(Succ))
           InLoop = Succ;
       }
       // Exit BB which have one branch back into the loop and another one to

>From 6d51827a842bda4d1d0d5d4da4961309b454a9c7 Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Thu, 28 Aug 2025 14:32:22 -0700
Subject: [PATCH 7/8] change

Created using spr 1.3.4
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 55b12dd2d34c3..dc14bfe857a76 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1879,7 +1879,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
       // be observed in the exit case (the trap). We could be smarter about
       // this, but for now lets pattern match common cases that directly trap.
       if (Unreachable == nullptr || InLoop == nullptr)
-        continue;
+        return Changed;
       if (llvm::any_of(*Unreachable, [](Instruction &I) {
             if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
               if (II->getIntrinsicID() != Intrinsic::trap &&
@@ -1890,7 +1890,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
             }
             return false;
           })) {
-        continue;
+        return Changed;
       }
     }
     Value *NewCond;

>From 3992eace1163620bacbe2a9b3fa5ce8f451351b8 Mon Sep 17 00:00:00 2001
From: Florian Mayer <fmayer at google.com>
Date: Thu, 28 Aug 2025 14:42:51 -0700
Subject: [PATCH 8/8] more test

Created using spr 1.3.4
---
 .../ScalarEvolution/unreachable-exit.ll       | 83 +++++++++++++++++++
 1 file changed, 83 insertions(+)

diff --git a/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll b/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll
index 4a035ce1a63a5..c44d5de1503eb 100644
--- a/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll
+++ b/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll
@@ -250,6 +250,89 @@ if.end4:                                          ; preds = %for.body
   br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !8
 }
 
+; Function Attrs: mustprogress uwtable
+define dso_local void @foo_call(i32 noundef %block_size) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @foo_call(
+; CHECK-SAME: i32 noundef [[BLOCK_SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[FOO_ARR:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT:    [[BAR_ARR:%.*]] = alloca [1025 x i8], align 16
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @x(ptr noundef nonnull [[FOO_ARR]])
+; CHECK-NEXT:    [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT:    br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[BLOCK_SIZE]] to i64
+; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
+; CHECK:       [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT:    br label %[[FOR_COND_CLEANUP]]
+; CHECK:       [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT:    call void @x(ptr noundef nonnull [[BAR_ARR]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT:    ret void
+; CHECK:       [[FOR_BODY]]:
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[EXITCOND1:%.*]] = icmp eq i64 [[INDVARS_IV]], 1024
+; CHECK-NEXT:    br i1 [[EXITCOND1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    call void @llvm.trap()
+; CHECK-NEXT:    unreachable
+; CHECK:       [[IF_END4]]:
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT:    call void @x(ptr null)
+; CHECK-NEXT:    store volatile i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP8]]
+;
+entry:
+  %foo_arr = alloca [1024 x i8], align 16
+  %bar_arr = alloca [1025 x i8], align 16
+  call void @llvm.lifetime.start.p0(ptr nonnull %foo_arr) #4
+  call void @llvm.lifetime.start.p0(ptr nonnull %bar_arr) #4
+  call void @x(ptr noundef nonnull %foo_arr)
+  %cmp14.not = icmp eq i32 %block_size, 0
+  br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %if.end4
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  call void @x(ptr noundef nonnull %bar_arr)
+  call void @llvm.lifetime.end.p0(ptr nonnull %bar_arr) #4
+  call void @llvm.lifetime.end.p0(ptr nonnull %foo_arr) #4
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %if.end4
+  %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+  %cmp1 = icmp samesign ugt i32 %i.015, 1023
+  br i1 %cmp1, label %if.then, label %if.end4
+
+if.then:                                          ; preds = %for.body
+  call void @llvm.trap()
+  unreachable
+
+if.end4:                                          ; preds = %for.body
+  %idxprom = zext nneg i32 %i.015 to i64
+  %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i64 %idxprom
+  %0 = load i8, ptr %arrayidx, align 1, !tbaa !5
+  %1 = xor i8 %0, 54
+  %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i64 %idxprom
+  call void @x(ptr null)
+  store volatile i8 %1, ptr %arrayidx7, align 1, !tbaa !5
+  %inc = add nuw nsw i32 %i.015, 1
+  %cmp = icmp ult i32 %inc, %block_size
+  br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !8
+}
+
 define dso_local void @foo_ubsan(i32 noundef %block_size) local_unnamed_addr #0 {
 ; CHECK-LABEL: define dso_local void @foo_ubsan(
 ; CHECK-SAME: i32 noundef [[BLOCK_SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {



More information about the llvm-commits mailing list