[llvm] 39b0cbe - [IndVarSimplify] Allow predicateLoopExit on some loops with thread-local writes (#155901)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 16 09:18:04 PDT 2025
Author: Florian Mayer
Date: 2025-10-16T09:18:00-07:00
New Revision: 39b0cbe69ca8704dbc5846a91d136f0ed6101782
URL: https://github.com/llvm/llvm-project/commit/39b0cbe69ca8704dbc5846a91d136f0ed6101782
DIFF: https://github.com/llvm/llvm-project/commit/39b0cbe69ca8704dbc5846a91d136f0ed6101782.diff
LOG: [IndVarSimplify] Allow predicateLoopExit on some loops with thread-local writes (#155901)
This is important to optimize patterns that frequently appear with
bounds checks:
```
for (int i = 0; i < N; ++i) {
bar[i] = foo[i] + 123;
}
```
which gets roughly turned into
```
for (int i = 0; i < N; ++i) {
if (i >= size of foo)
ubsan.trap();
if (i >= size of bar)
ubsan.trap();
bar[i] = foo[i] + 123;
}
```
Motivating example:
https://github.com/google/boringssl/blob/main/crypto/fipsmodule/hmac/hmac.cc.inc#L138
I hand-verified the assembly and confirmed that this optimization
removes the check in the loop.
This also allowed the loop to be vectorized.
Alive2: https://alive2.llvm.org/ce/z/3qMdLF
I did a `stage2-check-all` for both normal and
`-DBOOTSTRAP_CMAKE_C[XX]_FLAGS="-fsanitize=array-bounds
-fsanitize-trap=all"`.
I also ran some Google-internal tests with `fsanitize=array-bounds`.
Everything passes.
Added:
llvm/test/Transforms/IndVarSimplify/unreachable-exit.ll
Modified:
llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
llvm/test/Transforms/IndVarSimplify/X86/overflow-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index c32731185afd0..7ebcc219efc15 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -53,6 +53,7 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/PatternMatch.h"
@@ -117,6 +118,10 @@ static cl::opt<bool>
LoopPredication("indvars-predicate-loops", cl::Hidden, cl::init(true),
cl::desc("Predicate conditions in read only loops"));
+static cl::opt<bool> LoopPredicationTraps(
+ "indvars-predicate-loop-traps", cl::Hidden, cl::init(true),
+ cl::desc("Predicate conditions that trap in loops with only local writes"));
+
static cl::opt<bool>
AllowIVWidening("indvars-widen-indvars", cl::Hidden, cl::init(true),
cl::desc("Allow widening of indvars to eliminate s/zext"));
@@ -1704,6 +1709,24 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
return Changed;
}
+static bool crashingBBWithoutEffect(const BasicBlock &BB) {
+ return llvm::all_of(BB, [](const Instruction &I) {
+ // TODO: for now this is overly restrictive, to make sure nothing in this
+ // BB can depend on the loop body.
+ // It's not enough to check for !I.mayHaveSideEffects(), because e.g. a
+ // load does not have a side effect, but we could have
+ // %a = load ptr, ptr %ptr
+ // %b = load i32, ptr %a
+ // Now if the loop stored a non-nullptr to %a, we could cause a nullptr
+ // dereference by skipping over loop iterations.
+ if (const auto *CB = dyn_cast<CallBase>(&I)) {
+ if (CB->onlyAccessesInaccessibleMemory())
+ return true;
+ }
+ return isa<UnreachableInst>(I);
+ });
+}
+
bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
SmallVector<BasicBlock*, 16> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
@@ -1816,11 +1839,25 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
// suggestions on how to improve this? I can obviously bail out for outer
// loops, but that seems less than ideal. MemorySSA can find memory writes,
// is that enough for *all* side effects?
+ bool HasThreadLocalSideEffects = false;
for (BasicBlock *BB : L->blocks())
for (auto &I : *BB)
// TODO:isGuaranteedToTransfer
- if (I.mayHaveSideEffects())
- return false;
+ if (I.mayHaveSideEffects()) {
+ if (!LoopPredicationTraps)
+ return false;
+ HasThreadLocalSideEffects = true;
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ // Simple stores cannot be observed by other threads.
+ // If HasThreadLocalSideEffects is set, we check
+ // crashingBBWithoutEffect to make sure that the crashing BB cannot
+ // observe them either.
+ if (!SI->isSimple())
+ return false;
+ } else {
+ return false;
+ }
+ }
bool Changed = false;
// Finally, do the actual predication for all predicatable blocks. A couple
@@ -1840,6 +1877,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
const SCEV *ExitCount = SE->getExitCount(L, ExitingBB);
auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
+ if (HasThreadLocalSideEffects) {
+ const BasicBlock *Unreachable = nullptr;
+ for (const BasicBlock *Succ : BI->successors()) {
+ if (isa<UnreachableInst>(Succ->getTerminator()))
+ Unreachable = Succ;
+ }
+ // Exit BB which have one branch back into the loop and another one to
+ // a trap can still be optimized, because local side effects cannot
+ // be observed in the exit case (the trap). We could be smarter about
+ // this, but for now lets pattern match common cases that directly trap.
+ if (Unreachable == nullptr || !crashingBBWithoutEffect(*Unreachable))
+ return Changed;
+ }
Value *NewCond;
if (ExitCount == ExactBTC) {
NewCond = L->contains(BI->getSuccessor(0)) ?
diff --git a/llvm/test/Transforms/IndVarSimplify/X86/overflow-intrinsics.ll b/llvm/test/Transforms/IndVarSimplify/X86/overflow-intrinsics.ll
index cb4e07ef3e26b..9b9bc68ba7ad8 100644
--- a/llvm/test/Transforms/IndVarSimplify/X86/overflow-intrinsics.ll
+++ b/llvm/test/Transforms/IndVarSimplify/X86/overflow-intrinsics.ll
@@ -60,8 +60,7 @@ define void @f_sadd_overflow(ptr %a) {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ 2147483645, %[[ENTRY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], 2147483647
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
+; CHECK-NEXT: br i1 true, label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
; CHECK: [[TRAP]]:
; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]]
; CHECK-NEXT: unreachable, !nosanitize [[META0]]
@@ -150,8 +149,7 @@ define void @f_uadd_overflow(ptr %a) {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ -6, %[[ENTRY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV]], -1
-; CHECK-NEXT: br i1 [[EXITCOND]], label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
+; CHECK-NEXT: br i1 true, label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
; CHECK: [[TRAP]]:
; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]]
; CHECK-NEXT: unreachable, !nosanitize [[META0]]
@@ -243,10 +241,7 @@ define void @f_ssub_overflow(ptr nocapture %a) {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ -2147483642, %[[ENTRY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nsw i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 [[TMP0]], i32 1)
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
-; CHECK-NEXT: br i1 [[TMP2]], label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
+; CHECK-NEXT: br i1 true, label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
; CHECK: [[TRAP]]:
; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]]
; CHECK-NEXT: unreachable, !nosanitize [[META0]]
@@ -339,10 +334,7 @@ define void @f_usub_overflow(ptr nocapture %a) {
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[CONT:.*]] ], [ 15, %[[ENTRY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store i8 0, ptr [[ARRAYIDX]], align 1
-; CHECK-NEXT: [[TMP0:%.*]] = trunc nuw nsw i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT: [[TMP1:%.*]] = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 [[TMP0]], i32 1)
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP1]], 1
-; CHECK-NEXT: br i1 [[TMP2]], label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
+; CHECK-NEXT: br i1 true, label %[[TRAP:.*]], label %[[CONT]], !nosanitize [[META0]]
; CHECK: [[TRAP]]:
; CHECK-NEXT: tail call void @llvm.trap(), !nosanitize [[META0]]
; CHECK-NEXT: unreachable, !nosanitize [[META0]]
diff --git a/llvm/test/Transforms/IndVarSimplify/unreachable-exit.ll b/llvm/test/Transforms/IndVarSimplify/unreachable-exit.ll
new file mode 100644
index 0000000000000..b9c92288b18c1
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/unreachable-exit.ll
@@ -0,0 +1,738 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=indvars < %s | FileCheck %s
+
+define void @optimize_trap(i32 %block_size) {
+; CHECK-LABEL: define void @optimize_trap(
+; CHECK-SAME: i32 [[BLOCK_SIZE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: call void @x(ptr nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[BLOCK_SIZE]], -1
+; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 3)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 3, [[UMIN]]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr nonnull [[BAR_ARR]])
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I_015:%.*]] = phi i32 [ [[INC:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: store i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_015]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[BLOCK_SIZE]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
+;
+entry:
+ %foo_arr = alloca [2 x i8], align 16
+ %bar_arr = alloca [2 x i8], align 16
+ call void @x(ptr nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr nonnull %bar_arr)
+ ret void
+
+for.body: ; preds = %for.body.preheader, %if.end4
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %cmp1 = icmp samesign ugt i32 %i.015, 2
+ br i1 %cmp1, label %if.then, label %if.end4
+
+if.then: ; preds = %for.body
+ call void @llvm.trap()
+ unreachable
+
+if.end4: ; preds = %for.body
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i32 %i.015
+ %0 = load i8, ptr %arrayidx, align 1
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i32 %i.015
+ store i8 %1, ptr %arrayidx7, align 1
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @no_optimize_atomic(i32 %block_size) {
+; CHECK-LABEL: define void @no_optimize_atomic(
+; CHECK-SAME: i32 [[BLOCK_SIZE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: call void @x(ptr nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr nonnull [[BAR_ARR]])
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I_015:%.*]] = phi i32 [ [[INC:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp samesign ugt i32 [[I_015]], 2
+; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: store atomic i8 [[TMP4]], ptr [[ARRAYIDX7]] unordered, align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_015]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[BLOCK_SIZE]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
+;
+entry:
+ %foo_arr = alloca [2 x i8], align 16
+ %bar_arr = alloca [2 x i8], align 16
+ call void @x(ptr nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr nonnull %bar_arr)
+ ret void
+
+for.body: ; preds = %for.body.preheader, %if.end4
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %cmp1 = icmp samesign ugt i32 %i.015, 2
+ br i1 %cmp1, label %if.then, label %if.end4
+
+if.then: ; preds = %for.body
+ call void @llvm.trap()
+ unreachable
+
+if.end4: ; preds = %for.body
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i32 %i.015
+ %0 = load i8, ptr %arrayidx, align 1
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i32 %i.015
+ store atomic i8 %1, ptr %arrayidx7 unordered, align 1
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @no_optimize_volatile(i32 %block_size) {
+; CHECK-LABEL: define void @no_optimize_volatile(
+; CHECK-SAME: i32 [[BLOCK_SIZE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: call void @x(ptr nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr nonnull [[BAR_ARR]])
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I_015:%.*]] = phi i32 [ [[INC:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp samesign ugt i32 [[I_015]], 2
+; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: store volatile i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_015]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[BLOCK_SIZE]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
+;
+entry:
+ %foo_arr = alloca [2 x i8], align 16
+ %bar_arr = alloca [2 x i8], align 16
+ call void @x(ptr nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr nonnull %bar_arr)
+ ret void
+
+for.body: ; preds = %for.body.preheader, %if.end4
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %cmp1 = icmp samesign ugt i32 %i.015, 2
+ br i1 %cmp1, label %if.then, label %if.end4
+
+if.then: ; preds = %for.body
+ call void @llvm.trap()
+ unreachable
+
+if.end4: ; preds = %for.body
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i32 %i.015
+ %0 = load i8, ptr %arrayidx, align 1
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i32 %i.015
+ store volatile i8 %1, ptr %arrayidx7, align 1
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @no_optimize_call(i32 %block_size) {
+; CHECK-LABEL: define void @no_optimize_call(
+; CHECK-SAME: i32 [[BLOCK_SIZE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: call void @x(ptr nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr nonnull [[BAR_ARR]])
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I_015:%.*]] = phi i32 [ [[INC:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp samesign ugt i32 [[I_015]], 2
+; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @llvm.trap()
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: call void @x(ptr null)
+; CHECK-NEXT: store volatile i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_015]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[BLOCK_SIZE]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
+;
+entry:
+ %foo_arr = alloca [2 x i8], align 16
+ %bar_arr = alloca [2 x i8], align 16
+ call void @x(ptr nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr nonnull %bar_arr)
+ ret void
+
+for.body: ; preds = %for.body.preheader, %if.end4
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %cmp1 = icmp samesign ugt i32 %i.015, 2
+ br i1 %cmp1, label %if.then, label %if.end4
+
+if.then: ; preds = %for.body
+ call void @llvm.trap()
+ unreachable
+
+if.end4: ; preds = %for.body
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i32 %i.015
+ %0 = load i8, ptr %arrayidx, align 1
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i32 %i.015
+ call void @x(ptr null)
+ store volatile i8 %1, ptr %arrayidx7, align 1
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @optimize_ubsan_trap(i32 %block_size) {
+; CHECK-LABEL: define void @optimize_ubsan_trap(
+; CHECK-SAME: i32 [[BLOCK_SIZE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: call void @x(ptr nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[BLOCK_SIZE]], -1
+; CHECK-NEXT: [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP1]], i32 3)
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 3, [[UMIN]]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr nonnull [[BAR_ARR]])
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I_015:%.*]] = phi i32 [ [[INC:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @llvm.ubsantrap(i8 1)
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: store i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_015]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[BLOCK_SIZE]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
+;
+entry:
+ %foo_arr = alloca [2 x i8], align 16
+ %bar_arr = alloca [2 x i8], align 16
+ call void @x(ptr nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr nonnull %bar_arr)
+ ret void
+
+for.body: ; preds = %for.body.preheader, %if.end4
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %cmp1 = icmp samesign ugt i32 %i.015, 2
+ br i1 %cmp1, label %if.then, label %if.end4
+
+if.then: ; preds = %for.body
+ call void @llvm.ubsantrap(i8 1)
+ unreachable
+
+if.end4: ; preds = %for.body
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i32 %i.015
+ %0 = load i8, ptr %arrayidx, align 1
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i32 %i.015
+ store i8 %1, ptr %arrayidx7, align 1
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @no_optimize_arbitrary_call(i32 %block_size) {
+; CHECK-LABEL: define void @no_optimize_arbitrary_call(
+; CHECK-SAME: i32 [[BLOCK_SIZE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: call void @x(ptr nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr nonnull [[BAR_ARR]])
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I_015:%.*]] = phi i32 [ [[INC:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp samesign ugt i32 [[I_015]], 2
+; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @noreturn_with_argmem(ptr [[FOO_ARR]])
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP4:%.*]] = xor i8 [[TMP3]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: store i8 [[TMP4]], ptr [[ARRAYIDX7]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_015]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[BLOCK_SIZE]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
+;
+entry:
+ %foo_arr = alloca [2 x i8], align 16
+ %bar_arr = alloca [2 x i8], align 16
+ call void @x(ptr nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr nonnull %bar_arr)
+ ret void
+
+for.body: ; preds = %for.body.preheader, %if.end4
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %cmp1 = icmp samesign ugt i32 %i.015, 2
+ br i1 %cmp1, label %if.then, label %if.end4
+
+if.then: ; preds = %for.body
+ call void @noreturn_with_argmem(ptr %foo_arr)
+ unreachable
+
+if.end4: ; preds = %for.body
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i32 %i.015
+ %0 = load i8, ptr %arrayidx, align 1
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i32 %i.015
+ store i8 %1, ptr %arrayidx7, align 1
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @no_optimize_two_exits(i32 %block_size) {
+; CHECK-LABEL: define void @no_optimize_two_exits(
+; CHECK-SAME: i32 [[BLOCK_SIZE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: call void @x(ptr nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr nonnull [[BAR_ARR]])
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I_015:%.*]] = phi i32 [ [[INC:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[P:%.*]] = call i1 @pred()
+; CHECK-NEXT: br i1 [[P]], label %[[FOR_BODY_CONT:.*]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK: [[FOR_BODY_CONT]]:
+; CHECK-NEXT: [[CMP1:%.*]] = icmp samesign ugt i32 [[I_015]], 2
+; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @noreturn(ptr [[FOO_ARR]])
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[TMP0]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: store i8 [[TMP1]], ptr [[ARRAYIDX7]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_015]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[BLOCK_SIZE]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
+;
+entry:
+ %foo_arr = alloca [2 x i8], align 16
+ %bar_arr = alloca [2 x i8], align 16
+ call void @x(ptr nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr nonnull %bar_arr)
+ ret void
+
+for.body:
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %p = call i1 @pred()
+ br i1 %p, label %for.body.cont, label %for.cond.cleanup.loopexit
+
+for.body.cont: ; preds = %for.body.preheader, %if.end4
+ %cmp1 = icmp samesign ugt i32 %i.015, 2
+ br i1 %cmp1, label %if.then, label %if.end4
+
+if.then: ; preds = %for.body
+ call void @noreturn(ptr %foo_arr)
+ unreachable
+
+if.end4: ; preds = %for.body
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i32 %i.015
+ %0 = load i8, ptr %arrayidx, align 1
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i32 %i.015
+ store i8 %1, ptr %arrayidx7, align 1
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @no_optimize_two_exits2(i32 %block_size) {
+; CHECK-LABEL: define void @no_optimize_two_exits2(
+; CHECK-SAME: i32 [[BLOCK_SIZE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: call void @x(ptr nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr nonnull [[BAR_ARR]])
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I_015:%.*]] = phi i32 [ [[INC:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp samesign ugt i32 [[I_015]], 2
+; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[FOR_BODY_CONT:.*]]
+; CHECK: [[FOR_BODY_CONT]]:
+; CHECK-NEXT: [[P:%.*]] = call i1 @pred()
+; CHECK-NEXT: br i1 [[P]], label %[[IF_END4]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @noreturn(ptr [[FOO_ARR]])
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[TMP0]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: store i8 [[TMP1]], ptr [[ARRAYIDX7]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_015]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[BLOCK_SIZE]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
+;
+entry:
+ %foo_arr = alloca [2 x i8], align 16
+ %bar_arr = alloca [2 x i8], align 16
+ call void @x(ptr nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr nonnull %bar_arr)
+ ret void
+
+for.body:
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %cmp1 = icmp samesign ugt i32 %i.015, 2
+ br i1 %cmp1, label %if.then, label %for.body.cont
+
+for.body.cont: ; preds = %for.body.preheader, %if.end4
+ %p = call i1 @pred()
+ br i1 %p, label %if.end4, label %for.cond.cleanup.loopexit
+
+if.then: ; preds = %for.body
+ call void @noreturn(ptr %foo_arr)
+ unreachable
+
+if.end4: ; preds = %for.body
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i32 %i.015
+ %0 = load i8, ptr %arrayidx, align 1
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i32 %i.015
+ store i8 %1, ptr %arrayidx7, align 1
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @no_optimize_depdendent_ubsan_trap(i32 %block_size) {
+; CHECK-LABEL: define void @no_optimize_depdendent_ubsan_trap(
+; CHECK-SAME: i32 [[BLOCK_SIZE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: call void @x(ptr nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr nonnull [[BAR_ARR]])
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I_015:%.*]] = phi i32 [ [[INC:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp samesign ugt i32 [[I_015]], 2
+; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[I_015_LCSSA:%.*]] = phi i32 [ [[I_015]], %[[FOR_BODY]] ]
+; CHECK-NEXT: call void @noreturn_with_i32(i32 [[I_015_LCSSA]])
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[TMP0]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: store i8 [[TMP1]], ptr [[ARRAYIDX7]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_015]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[BLOCK_SIZE]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
+;
+entry:
+ %foo_arr = alloca [2 x i8], align 16
+ %bar_arr = alloca [2 x i8], align 16
+ call void @x(ptr nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr nonnull %bar_arr)
+ ret void
+
+for.body: ; preds = %for.body.preheader, %if.end4
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %cmp1 = icmp samesign ugt i32 %i.015, 2
+ br i1 %cmp1, label %if.then, label %if.end4
+
+if.then: ; preds = %for.body
+ call void @noreturn_with_i32(i32 %i.015)
+ unreachable
+
+if.end4: ; preds = %for.body
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i32 %i.015
+ %0 = load i8, ptr %arrayidx, align 1
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i32 %i.015
+ store i8 %1, ptr %arrayidx7, align 1
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+define void @no_optimize_depdendent_load_trap(i32 %block_size) {
+; CHECK-LABEL: define void @no_optimize_depdendent_load_trap(
+; CHECK-SAME: i32 [[BLOCK_SIZE:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[FOO_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: [[BAR_ARR:%.*]] = alloca [2 x i8], align 16
+; CHECK-NEXT: call void @x(ptr nonnull [[FOO_ARR]])
+; CHECK-NEXT: [[CMP14_NOT:%.*]] = icmp eq i32 [[BLOCK_SIZE]], 0
+; CHECK-NEXT: br i1 [[CMP14_NOT]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_PREHEADER]]:
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_COND_CLEANUP_LOOPEXIT:.*]]:
+; CHECK-NEXT: br label %[[FOR_COND_CLEANUP]]
+; CHECK: [[FOR_COND_CLEANUP]]:
+; CHECK-NEXT: call void @x(ptr nonnull [[BAR_ARR]])
+; CHECK-NEXT: ret void
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[I_015:%.*]] = phi i32 [ [[INC:%.*]], %[[IF_END4:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[CMP1:%.*]] = icmp samesign ugt i32 [[I_015]], 2
+; CHECK-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END4]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: [[I_015_LCSSA:%.*]] = load i8, ptr [[FOO_ARR]], align 1
+; CHECK-NEXT: call void @noreturn_with_i8(i8 [[I_015_LCSSA]])
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END4]]:
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw [1024 x i8], ptr [[FOO_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; CHECK-NEXT: [[TMP1:%.*]] = xor i8 [[TMP0]], 54
+; CHECK-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds nuw [1025 x i8], ptr [[BAR_ARR]], i64 0, i32 [[I_015]]
+; CHECK-NEXT: store i8 [[TMP1]], ptr [[ARRAYIDX7]], align 1
+; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_015]], 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[INC]], [[BLOCK_SIZE]]
+; CHECK-NEXT: br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP_LOOPEXIT]]
+;
+entry:
+ %foo_arr = alloca [2 x i8], align 16
+ %bar_arr = alloca [2 x i8], align 16
+ call void @x(ptr nonnull %foo_arr)
+ %cmp14.not = icmp eq i32 %block_size, 0
+ br i1 %cmp14.not, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.cond.cleanup.loopexit: ; preds = %if.end4
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry
+ call void @x(ptr nonnull %bar_arr)
+ ret void
+
+for.body: ; preds = %for.body.preheader, %if.end4
+ %i.015 = phi i32 [ %inc, %if.end4 ], [ 0, %for.body.preheader ]
+ %cmp1 = icmp samesign ugt i32 %i.015, 2
+ br i1 %cmp1, label %if.then, label %if.end4
+
+if.then: ; preds = %for.body
+ %r = load i8, ptr %foo_arr, align 1
+ call void @noreturn_with_i8(i8 %r)
+ unreachable
+
+if.end4: ; preds = %for.body
+ %arrayidx = getelementptr inbounds nuw [1024 x i8], ptr %foo_arr, i64 0, i32 %i.015
+ %0 = load i8, ptr %arrayidx, align 1
+ %1 = xor i8 %0, 54
+ %arrayidx7 = getelementptr inbounds nuw [1025 x i8], ptr %bar_arr, i64 0, i32 %i.015
+ store i8 %1, ptr %arrayidx7, align 1
+ %inc = add nuw nsw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc, %block_size
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit
+}
+
+
+declare void @x(ptr noundef) local_unnamed_addr
+declare i1 @pred() local_unnamed_addr
+
+declare void @llvm.trap() #0
+declare void @noreturn(ptr) #0
+declare void @noreturn_with_i32(i32) #0
+declare void @noreturn_with_i8(i8) #0
+declare void @noreturn_with_argmem(ptr) #1
+
+attributes #0 = { cold noreturn nounwind memory(inaccessiblemem: write) }
+attributes #1 = { cold noreturn nounwind memory(argmem: read) }
More information about the llvm-commits
mailing list