[llvm] [IndVarSimplify] Allow predicateLoopExit on some loops with local writes (PR #155901)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 28 15:08:26 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Mayer (fmayer)
<details>
<summary>Changes</summary>
This is important to optimize patterns that frequently appear with
bounds checks:
```
for (int i = 0; i < N; ++i) {
bar[i] = foo[i] + 123;
}
```
which gets roughly turned into
```
for (int i = 0; i < N; ++i) {
if (i >= size of foo)
ubsan.trap();
if (i >= size of bar)
ubsan.trap();
bar[i] = foo[i] + 123;
}
```
Alive2: https://alive2.llvm.org/ce/z/NREqBS
I did a `stage2-check-all` for both normal and
`-DBOOTSTRAP_CMAKE_C[XX]_FLAGS="-fsanitize=array-bounds -fsanitize-trap=all"`.
I also ran some Google-internal tests with `fsanitize=array-bounds`. Everything passes.
---
Patch is 32.28 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/155901.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/IndVarSimplify.cpp (+55-2)
- (added) llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll (+536)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c32731185afd0..dc14bfe857a76 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -53,6 +53,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
@@ -117,6 +118,10 @@ static cl::opt<bool>
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));
+static cl::opt<bool> LoopPredicationTraps(
+ "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
+ cl::desc("Predicate conditions that trap in loops with only local writes"));
+
static cl::opt<bool>
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
cl::desc("Allow widening of indvars to eliminate s/zext"));
@@ -1816,11 +1821,31 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// suggestions on how to improve this? I can obviously bail out for outer
// loops, but that seems less than ideal. MemorySSA can find memory writes,
// is that enough for *all* side effects?
+ bool HasLocalSideEffects = false;
for (BasicBlock *BB : L->blocks())
for (auto &I : *BB)
// TODO:isGuaranteedToTransfer
- if (I.mayHaveSideEffects())
- return false;
+ if (I.mayHaveSideEffects()) {
+ if (!LoopPredicationTraps)
+ return false;
+ HasLocalSideEffects = true;
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ // The local could have leaked out of the function, so we need to
+ // consider atomic operations as effects.
+ // Because we need to preserve the relative order of volatile
+ // accesses, turn off this optimization if we see any of them.
+ // TODO:
+ // We could be smarter about volatile, and check whether the
+ // reordering is valid.
+ // We also could be smarter about atomic, and check whether the
+ // local has leaked.
+ if (SI->isAtomic() || SI->isVolatile() ||
+ findAllocaForValue(SI->getPointerOperand(), false) == nullptr)
+ return false;
+ } else {
+ return false;
+ }
+ }
bool Changed = false;
// Finally, do the actual predication for all predicatable blocks. A couple
@@ -1840,6 +1865,34 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ if (HasLocalSideEffects) {
+ BasicBlock *Unreachable = nullptr;
+ BasicBlock *InLoop = nullptr;
+ for (BasicBlock *Succ : BI->successors()) {
+ if (isa<UnreachableInst>(Succ->getTerminator()))
+ Unreachable = Succ;
+ else if (L->contains(Succ))
+ InLoop = Succ;
+ }
+ // Exit BB which have one branch back into the loop and another one to
+ // a trap can still be optimized, because local side effects cannot
+ // be observed in the exit case (the trap). We could be smarter about
+ // this, but for now lets pattern match common cases that directly trap.
+ if (Unreachable == nullptr || InLoop == nullptr)
+ return Changed;
+ if (llvm::any_of(*Unreachable, [](Instruction &I) {
+ if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
+ if (II->getIntrinsicID() != Intrinsic::trap &&
+ II->getIntrinsicID() != Intrinsic::ubsantrap)
+ return true;
+ } else if (!isa<UnreachableInst>(I)) {
+ return true;
+ }
+ return false;
+ })) {
+ return Changed;
+ }
+ }
Value *NewCond;
if (ExitCount == ExactBTC) {
NewCond = L->contains(BI->getSuccessor(0)) ?
diff --git a/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll b/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll
new file mode 100644
index 0000000000000..c44d5de1503eb
--- /dev/null
+++ b/llvm/test/Analysis/ScalarEvolution/unreachable-exit.ll
@@ -0,0 +1,536 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=indvars < %s | FileCheck %s
+
+source_filename = "/usr/local/google/home/fmayer/loop/src3.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @foo(i32 noundef %block_size) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @foo(
+; CHECK-SAME: i32 noundef [[BLOCK_SIZE:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [1025 x i8], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: call void @x(ptr noundef nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[BLOCK_SIZE]] to i64
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[BLOCK_SIZE]], -1
+; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 1024)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 1024, [[UMIN]]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr noundef nonnull [[BAR_ARR]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA5:![0-9]+]]
+; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT: store i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP8:![0-9]+]]
+;
+entry:
+ %foo_arr = alloca [1024 x i8], align 16
+ %bar_arr = alloca [1025 x i8], align 16
+ call void @llvm.lifetime.start.p0(ptr nonnull %foo_arr) #4
+ call void @llvm.lifetime.start.p0(ptr nonnull %bar_arr) #4
+ call void @x(ptr noundef nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr noundef nonnull %bar_arr)
+ call void @llvm.lifetime.end.p0(ptr nonnull %bar_arr) #4
+ call void @llvm.lifetime.end.p0(ptr nonnull %foo_arr) #4
+ ret void
+
+for.body: ; preds = %for.body.preheader, %if.end4
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %cmp1 = icmp samesign ugt i32 %i.015, 1023
+ br i1 %cmp1, label %if.then, label %if.end4
+
+if.then: ; preds = %for.body
+ call void @llvm.trap()
+ unreachable
+
+if.end4: ; preds = %for.body
+ %idxprom = zext nneg i32 %i.015 to i64
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i64 %idxprom
+ %0 = load i8, ptr %arrayidx, align 1, !tbaa !5
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i64 %idxprom
+ store i8 %1, ptr %arrayidx7, align 1, !tbaa !5
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !8
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @foo_atomic(i32 noundef %block_size) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @foo_atomic(
+; CHECK-SAME: i32 noundef [[BLOCK_SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [1025 x i8], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: call void @x(ptr noundef nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[BLOCK_SIZE]] to i64
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr noundef nonnull [[BAR_ARR]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i64 [[INDVARS_IV]], 1024
+; CHECK-NEXT: br i1 [[EXITCOND1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT: store atomic i8 [[TMP4]], ptr [[ARRAYIDX7]] unordered, align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP8]]
+;
+entry:
+ %foo_arr = alloca [1024 x i8], align 16
+ %bar_arr = alloca [1025 x i8], align 16
+ call void @llvm.lifetime.start.p0(ptr nonnull %foo_arr) #4
+ call void @llvm.lifetime.start.p0(ptr nonnull %bar_arr) #4
+ call void @x(ptr noundef nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr noundef nonnull %bar_arr)
+ call void @llvm.lifetime.end.p0(ptr nonnull %bar_arr) #4
+ call void @llvm.lifetime.end.p0(ptr nonnull %foo_arr) #4
+ ret void
+
+for.body: ; preds = %for.body.preheader, %if.end4
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %cmp1 = icmp samesign ugt i32 %i.015, 1023
+ br i1 %cmp1, label %if.then, label %if.end4
+
+if.then: ; preds = %for.body
+ call void @llvm.trap()
+ unreachable
+
+if.end4: ; preds = %for.body
+ %idxprom = zext nneg i32 %i.015 to i64
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i64 %idxprom
+ %0 = load i8, ptr %arrayidx, align 1, !tbaa !5
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i64 %idxprom
+ store atomic i8 %1, ptr %arrayidx7 unordered, align 1, !tbaa !5
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !8
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @foo_volatile(i32 noundef %block_size) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @foo_volatile(
+; CHECK-SAME: i32 noundef [[BLOCK_SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [1025 x i8], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: call void @x(ptr noundef nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[BLOCK_SIZE]] to i64
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr noundef nonnull [[BAR_ARR]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i64 [[INDVARS_IV]], 1024
+; CHECK-NEXT: br i1 [[EXITCOND1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT: store volatile i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]], !llvm.loop [[LOOP8]]
+;
+entry:
+ %foo_arr = alloca [1024 x i8], align 16
+ %bar_arr = alloca [1025 x i8], align 16
+ call void @llvm.lifetime.start.p0(ptr nonnull %foo_arr) #4
+ call void @llvm.lifetime.start.p0(ptr nonnull %bar_arr) #4
+ call void @x(ptr noundef nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr noundef nonnull %bar_arr)
+ call void @llvm.lifetime.end.p0(ptr nonnull %bar_arr) #4
+ call void @llvm.lifetime.end.p0(ptr nonnull %foo_arr) #4
+ ret void
+
+for.body: ; preds = %for.body.preheader, %if.end4
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %cmp1 = icmp samesign ugt i32 %i.015, 1023
+ br i1 %cmp1, label %if.then, label %if.end4
+
+if.then: ; preds = %for.body
+ call void @llvm.trap()
+ unreachable
+
+if.end4: ; preds = %for.body
+ %idxprom = zext nneg i32 %i.015 to i64
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i64 %idxprom
+ %0 = load i8, ptr %arrayidx, align 1, !tbaa !5
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i64 %idxprom
+ store volatile i8 %1, ptr %arrayidx7, align 1, !tbaa !5
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !8
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local void @foo_call(i32 noundef %block_size) local_unnamed_addr #0 {
+; CHECK-LABEL: define dso_local void @foo_call(
+; CHECK-SAME: i32 noundef [[BLOCK_SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [1024 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [1025 x i8], align 16
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: call void @x(ptr noundef nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[BLOCK_SIZE]] to i64
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr noundef nonnull [[BAR_ARR]])
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[BAR_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr nonnull [[FOO_ARR]]) #[[ATTR6]]
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[EXITCOND1:%.*]] = icmp eq i64 [[INDVARS_IV]], 1024
+; CHECK-NEXT: br i1 [[EXITCOND1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i64 [[INDVARS_IV]]
+; CHECK-NEXT: call void @x(ptr null)
+; CHECK-NEXT: store volatile i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1, !tbaa [[TBAA5]]
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_COND_CLE...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/155901
More information about the llvm-commits
mailing list