[llvm] [ValueTracking] Conservative nosync check prevents vectorization (PR #181345)
Kshitij Paranjape via llvm-commits
llvm-commits at lists.llvm.org
Sat Feb 14 07:47:37 PST 2026
https://github.com/kshitijvp updated https://github.com/llvm/llvm-project/pull/181345
>From 8fc6d194fe35a4842297e5a825b2cd9100937143 Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Fri, 13 Feb 2026 16:12:18 +0530
Subject: [PATCH 1/9] [ValueTracking] Conservative nosync check prevents
vectorization
---
llvm/lib/Analysis/ValueTracking.cpp | 18 ++-
.../Transforms/LoopVectorize/issue180180.ll | 133 ++++++++++++++++++
2 files changed, 146 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/Transforms/LoopVectorize/issue180180.ll
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 8761b7bcb51a2..37f7005fe4c3d 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -715,11 +715,19 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
}
return true;
};
-
+
// Make sure the current function cannot arrange for another thread to free on
// its behalf.
- if (!CtxI->getFunction()->hasNoSync())
- return false;
+ auto hasNoSyncCalls = [](auto Range) {
+ for (const auto &[Idx, I] : enumerate(Range)) {
+ if (Idx > MaxInstrsToCheckForFree)
+ return false;
+ if (const auto *CB = dyn_cast<CallBase>(&I))
+ if (!CB->hasFnAttr(Attribute::NoSync))
+ return false;
+ }
+ return true;
+ };
// Handle cross-block case: CtxI in a successor of Assume's block.
const BasicBlock *CtxBB = CtxI->getParent();
@@ -729,7 +737,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
if (CtxBB->getSinglePredecessor() != AssumeBB)
return false;
- if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)))
+ if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)) || !hasNoSyncCalls(make_range(CtxBB->begin(), CtxIter)))
return false;
CtxIter = AssumeBB->end();
@@ -741,7 +749,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
// Check if there are any calls between Assume and CtxIter that may free
// memory.
- return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter));
+ return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter)) && hasNoSyncCalls(make_range(Assume->getIterator(), CtxIter));
}
// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
diff --git a/llvm/test/Transforms/LoopVectorize/issue180180.ll b/llvm/test/Transforms/LoopVectorize/issue180180.ll
new file mode 100644
index 0000000000000..b17acb67488cd
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/issue180180.ll
@@ -0,0 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt %s -O3 -debug-only=loop-vectorize -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at ptr = local_unnamed_addr global ptr null, align 8
+
+; Function Attrs: mustprogress norecurse nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync)
+define i64 @notVectorizingTest(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) %v, i32 noundef %n) local_unnamed_addr #0 {
+; CHECK-LABEL: define i64 @notVectorizingTest(
+; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[V:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[V]], align 8
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT: [[__END__I:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
+; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__END__I]], align 8
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT: [[COERCE_VAL_PI_I_I14:%.*]] = ptrtoint ptr [[TMP1]] to i64
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT: [[CMP_I_I7_NOT11_I:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_I_I7_NOT11_I]], label %[[BR1:.*]], label %[[FOR_BODY_I_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_I_PREHEADER]]:
+; CHECK-NEXT: [[COERCE_VAL_PI_I_I:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[COERCE_VAL_PI_I_I14]], -4
+; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[COERCE_VAL_PI_I_I]]
+; CHECK-NEXT: [[TMP4:%.*]] = lshr exact i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 12
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_I_PREHEADER6:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP5]], 9223372036854775804
+; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[N_VEC]], 2
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP6]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
+; CHECK-NEXT: [[WIDE_LOAD_FR:%.*]] = freeze <4 x i32> [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD_FR]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i1> [[TMP8]] to i4
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP9]], 0
+; CHECK-NEXT: br i1 [[DOTNOT]], label %[[VECTOR_BODY_INTERIM]], label %[[VECTOR_EARLY_EXIT:.*]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP5]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[BR1]], label %[[FOR_BODY_I_PREHEADER6]]
+; CHECK: [[FOR_BODY_I_PREHEADER6]]:
+; CHECK-NEXT: [[__FIRST_SROA_0_012_I_PH:%.*]] = phi ptr [ [[TMP0]], %[[FOR_BODY_I_PREHEADER]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[FOR_BODY_I:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 false)
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 2
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP13]]
+; CHECK-NEXT: br label %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I:.*]]
+; CHECK: [[FOR_BODY_I]]:
+; CHECK-NEXT: [[__FIRST_SROA_0_012_I:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[FOR_INC_I:.*]] ], [ [[__FIRST_SROA_0_012_I_PH]], %[[FOR_BODY_I_PREHEADER6]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[__FIRST_SROA_0_012_I]], align 4
+; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp slt i32 [[TMP15]], [[N]]
+; CHECK-NEXT: br i1 [[CMP_I_I]], label %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]], label %[[FOR_INC_I]]
+; CHECK: [[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]]:
+; CHECK-NEXT: [[__FIRST_SROA_0_012_I_LCSSA:%.*]] = phi ptr [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ], [ [[__FIRST_SROA_0_012_I]], %[[FOR_BODY_I]] ]
+; CHECK-NEXT: [[DOTPRE14_I:%.*]] = ptrtoint ptr [[__FIRST_SROA_0_012_I_LCSSA]] to i64
+; CHECK-NEXT: br label %[[BR1]]
+; CHECK: [[FOR_INC_I]]:
+; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__FIRST_SROA_0_012_I]], i64 4
+; CHECK-NEXT: [[CMP_I_I7_NOT_I:%.*]] = icmp eq ptr [[INCDEC_PTR_I_I]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_I_I7_NOT_I]], label %[[BR1]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[BR1]]:
+; CHECK-NEXT: [[COERCE_VAL_PI_PRE_PHI_I:%.*]] = phi i64 [ [[COERCE_VAL_PI_I_I14]], %[[ENTRY]] ], [ [[DOTPRE14_I]], %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]] ], [ [[COERCE_VAL_PI_I_I14]], %[[MIDDLE_BLOCK]] ], [ [[COERCE_VAL_PI_I_I14]], %[[FOR_INC_I]] ]
+; CHECK-NEXT: ret i64 [[COERCE_VAL_PI_PRE_PHI_I]]
+;
+entry:
+ %0 = load ptr, ptr %v, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+ %coerce.val.pi.i.i = ptrtoint ptr %0 to i64
+ %__end_.i = getelementptr inbounds nuw i8, ptr %v, i64 8
+ %1 = load ptr, ptr %__end_.i, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+ %coerce.val.pi.i.i14 = ptrtoint ptr %1 to i64
+ %sub.ptr.sub.i.i.i = sub i64 %coerce.val.pi.i.i14, %coerce.val.pi.i.i
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %0, i64 %sub.ptr.sub.i.i.i) ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+ %cmp.i.i7.not11.i = icmp eq ptr %0, %1
+ br i1 %cmp.i.i7.not11.i, label %br1, label %for.body.i.preheader
+
+for.body.i.preheader: ; preds = %entry
+ br label %for.body.i
+
+for.body.i: ; preds = %for.body.i.preheader, %for.inc.i
+ %__first.sroa.0.012.i = phi ptr [ %incdec.ptr.i.i, %for.inc.i ], [ %0, %for.body.i.preheader ]
+ %2 = load i32, ptr %__first.sroa.0.012.i, align 4
+ %cmp.i.i = icmp slt i32 %2, %n
+ br i1 %cmp.i.i, label %for.body.for.end.loopexit_crit_edge.i, label %for.inc.i
+
+for.body.for.end.loopexit_crit_edge.i: ; preds = %for.body.i
+ %__first.sroa.0.012.i.lcssa = phi ptr [ %__first.sroa.0.012.i, %for.body.i ]
+ %.pre14.i = ptrtoint ptr %__first.sroa.0.012.i.lcssa to i64
+ br label %br1
+
+for.inc.i: ; preds = %for.body.i
+ %incdec.ptr.i.i = getelementptr inbounds nuw i8, ptr %__first.sroa.0.012.i, i64 4
+ %cmp.i.i7.not.i = icmp eq ptr %incdec.ptr.i.i, %1
+ br i1 %cmp.i.i7.not.i, label %br2, label %for.body.i
+
+br2: ; preds = %for.inc.i
+ br label %br1
+
+br1: ; preds = %br2, %entry, %for.body.for.end.loopexit_crit_edge.i
+ %coerce.val.pi.pre-phi.i = phi i64 [ %coerce.val.pi.i.i14, %entry ], [ %.pre14.i, %for.body.for.end.loopexit_crit_edge.i ], [ %coerce.val.pi.i.i14, %br2 ]
+ ret i64 %coerce.val.pi.pre-phi.i
+}
+
+; Function Attrs: mustprogress nocallback nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef) #1
+
+attributes #0 = { mustprogress nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync) "frame-pointer"="non-leaf-no-reserve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
>From e16296d0ccc16f5a9fa23fe2ebf192688eca2196 Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Fri, 13 Feb 2026 16:22:06 +0530
Subject: [PATCH 2/9] nit
---
llvm/lib/Analysis/ValueTracking.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 37f7005fe4c3d..7188df0da9fd3 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -715,9 +715,9 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
}
return true;
};
-
- // Make sure the current function cannot arrange for another thread to free on
- // its behalf.
+
+ // Helper to make sure the current function cannot arrange for
+ // another thread to free on its behalf.
auto hasNoSyncCalls = [](auto Range) {
for (const auto &[Idx, I] : enumerate(Range)) {
if (Idx > MaxInstrsToCheckForFree)
>From 3e98c0b0535061482a926e8900e42cf41e1a3e36 Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Fri, 13 Feb 2026 16:24:40 +0530
Subject: [PATCH 3/9] nit
---
llvm/lib/Analysis/ValueTracking.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 7188df0da9fd3..92f673566edde 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -737,7 +737,8 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
if (CtxBB->getSinglePredecessor() != AssumeBB)
return false;
- if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)) || !hasNoSyncCalls(make_range(CtxBB->begin(), CtxIter)))
+ if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)) ||
+ !hasNoSyncCalls(make_range(CtxBB->begin(), CtxIter)))
return false;
CtxIter = AssumeBB->end();
@@ -749,7 +750,8 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
// Check if there are any calls between Assume and CtxIter that may free
// memory.
- return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter)) && hasNoSyncCalls(make_range(Assume->getIterator(), CtxIter));
+ return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter)) &&
+ hasNoSyncCalls(make_range(Assume->getIterator(), CtxIter));
}
// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
>From 77b28d9e6747446525377778f2b1e1ea7ee13c4f Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Fri, 13 Feb 2026 16:28:36 +0530
Subject: [PATCH 4/9] nitpick
---
llvm/lib/Analysis/ValueTracking.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 92f673566edde..b3d159e4ff43e 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -716,7 +716,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
return true;
};
- // Helper to make sure the current function cannot arrange for
+ // Helper to make sure the current function cannot arrange for
// another thread to free on its behalf.
auto hasNoSyncCalls = [](auto Range) {
for (const auto &[Idx, I] : enumerate(Range)) {
>From ff0f1335be44919bfa1681368202f6cc8deb21aa Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Sat, 14 Feb 2026 01:56:41 +0530
Subject: [PATCH 5/9] Fixed hasNoSync logic and added negative tests
Added checks in NoSync lambda function to account for
other synchronizing instructions such as fence,
volatile, atomic instructions. Also added more negative
tests that do not get vectorized when they contain
synchronizing instructions between assume instruction
and CtxI instruction.
---
llvm/lib/Analysis/ValueTracking.cpp | 49 +++-
.../Transforms/LoopVectorize/issue180180.ll | 219 ++++++++++++++++++
2 files changed, 261 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index b3d159e4ff43e..ba08abe4c6748 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -718,13 +718,49 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
// Helper to make sure the current function cannot arrange for
// another thread to free on its behalf.
- auto hasNoSyncCalls = [](auto Range) {
+ auto hasNoSync = [](auto Range) {
for (const auto &[Idx, I] : enumerate(Range)) {
if (Idx > MaxInstrsToCheckForFree)
return false;
- if (const auto *CB = dyn_cast<CallBase>(&I))
- if (!CB->hasFnAttr(Attribute::NoSync))
+ if (I.isVolatile()) {
+ return false;
+ }
+
+ auto isOrderedAtomic = [](const Instruction *Inst) {
+ if (!Inst->isAtomic())
return false;
+
+ if (auto *FI = dyn_cast<FenceInst>(Inst))
+ // All legal orderings for fence are stronger than monotonic.
+ return FI->getSyncScopeID() != SyncScope::SingleThread;
+ else if (isa<AtomicCmpXchgInst>(Inst) || isa<AtomicRMWInst>(Inst))
+ return true;
+ else if (auto *SI = dyn_cast<StoreInst>(Inst))
+ return !SI->isUnordered();
+ else if (auto *LI = dyn_cast<LoadInst>(Inst))
+ return !LI->isUnordered();
+ else {
+ llvm_unreachable("unknown atomic instruction?");
+ }
+ };
+ // An ordered atomic may synchronize.
+ if (isOrderedAtomic(&I)) {
+ return false;
+ }
+
+ auto *CB = dyn_cast<CallBase>(&I);
+ if (!CB)
+ // Non call site cases covered by the two checks above
+ continue;
+
+ if (CB->hasFnAttr(Attribute::NoSync))
+ continue;
+
+ // Non volatile memset/memcpy/memmoves are nosync
+ if (auto *MI = dyn_cast<MemIntrinsic>(&I))
+ if (!MI->isVolatile())
+ continue;
+ return false;
}
return true;
};
@@ -738,8 +774,8 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
return false;
if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)) ||
- !hasNoSyncCalls(make_range(CtxBB->begin(), CtxIter)))
- return false;
+ !hasNoSync(make_range(CtxBB->begin(), CtxIter)))
+ return false;
CtxIter = AssumeBB->end();
} else {
@@ -747,11 +783,10 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
if (!Assume->comesBefore(CtxI))
return false;
}
-
// Check if there are any calls between Assume and CtxIter that may free
// memory.
return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter)) &&
- hasNoSyncCalls(make_range(Assume->getIterator(), CtxIter));
+ hasNoSync(make_range(Assume->getIterator(), CtxIter));
}
// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
diff --git a/llvm/test/Transforms/LoopVectorize/issue180180.ll b/llvm/test/Transforms/LoopVectorize/issue180180.ll
index b17acb67488cd..14352440f44ca 100644
--- a/llvm/test/Transforms/LoopVectorize/issue180180.ll
+++ b/llvm/test/Transforms/LoopVectorize/issue180180.ll
@@ -119,8 +119,227 @@ br1: ; preds = %br2, %entry, %for.body.for.end.loopexit_crit_edge.i
%coerce.val.pi.pre-phi.i = phi i64 [ %coerce.val.pi.i.i14, %entry ], [ %.pre14.i, %for.body.for.end.loopexit_crit_edge.i ], [ %coerce.val.pi.i.i14, %br2 ]
ret i64 %coerce.val.pi.pre-phi.i
}
+; Function Attrs: mustprogress nocallback nounwind willreturn memory(inaccessiblemem: write)
+
+; Function Attrs: mustprogress norecurse nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync)
+define i64 @volatileVectorizingTest(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) %v, i32 noundef %n) local_unnamed_addr #0 {
+; CHECK-LABEL: define i64 @volatileVectorizingTest(
+; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[V:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[V]], align 8
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT: [[__END__I:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
+; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__END__I]], align 8
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT: [[VOLATILE:%.*]] = load volatile ptr, ptr [[V]], align 8
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT: [[CMP_I_I7_NOT11_I:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_I_I7_NOT11_I]], label %[[BR1:.*]], label %[[FOR_BODY_I:.*]]
+; CHECK: [[FOR_BODY_I]]:
+; CHECK-NEXT: [[__FIRST_SROA_0_012_I:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[FOR_INC_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[__FIRST_SROA_0_012_I]], align 4
+; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp slt i32 [[TMP2]], [[N]]
+; CHECK-NEXT: br i1 [[CMP_I_I]], label %[[BR1]], label %[[FOR_INC_I]]
+; CHECK: [[FOR_INC_I]]:
+; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__FIRST_SROA_0_012_I]], i64 4
+; CHECK-NEXT: [[CMP_I_I7_NOT_I:%.*]] = icmp eq ptr [[INCDEC_PTR_I_I]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_I_I7_NOT_I]], label %[[BR1]], label %[[FOR_BODY_I]]
+; CHECK: [[BR1]]:
+; CHECK-NEXT: [[COERCE_VAL_PI_PRE_PHI_I_IN:%.*]] = phi ptr [ [[TMP1]], %[[ENTRY]] ], [ [[TMP1]], %[[FOR_INC_I]] ], [ [[__FIRST_SROA_0_012_I]], %[[FOR_BODY_I]] ]
+; CHECK-NEXT: [[COERCE_VAL_PI_PRE_PHI_I:%.*]] = ptrtoint ptr [[COERCE_VAL_PI_PRE_PHI_I_IN]] to i64
+; CHECK-NEXT: ret i64 [[COERCE_VAL_PI_PRE_PHI_I]]
+;
+entry:
+ %0 = load ptr, ptr %v, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+ %coerce.val.pi.i.i = ptrtoint ptr %0 to i64
+ %__end_.i = getelementptr inbounds nuw i8, ptr %v, i64 8
+ %1 = load ptr, ptr %__end_.i, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+ %coerce.val.pi.i.i14 = ptrtoint ptr %1 to i64
+ %sub.ptr.sub.i.i.i = sub i64 %coerce.val.pi.i.i14, %coerce.val.pi.i.i
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %0, i64 %sub.ptr.sub.i.i.i) ]
+ %volatile = load volatile ptr, ptr %v, align 8 ; Volatile Instruction
+ call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+ %cmp.i.i7.not11.i = icmp eq ptr %0, %1
+ br i1 %cmp.i.i7.not11.i, label %br1, label %for.body.i.preheader
+
+for.body.i.preheader: ; preds = %entry
+ br label %for.body.i
+
+for.body.i: ; preds = %for.body.i.preheader, %for.inc.i
+ %__first.sroa.0.012.i = phi ptr [ %incdec.ptr.i.i, %for.inc.i ], [ %0, %for.body.i.preheader ]
+ %2 = load i32, ptr %__first.sroa.0.012.i, align 4
+ %cmp.i.i = icmp slt i32 %2, %n
+ br i1 %cmp.i.i, label %for.body.for.end.loopexit_crit_edge.i, label %for.inc.i
+
+for.body.for.end.loopexit_crit_edge.i: ; preds = %for.body.i
+ %__first.sroa.0.012.i.lcssa = phi ptr [ %__first.sroa.0.012.i, %for.body.i ]
+ %.pre14.i = ptrtoint ptr %__first.sroa.0.012.i.lcssa to i64
+ br label %br1
+
+for.inc.i: ; preds = %for.body.i
+ %incdec.ptr.i.i = getelementptr inbounds nuw i8, ptr %__first.sroa.0.012.i, i64 4
+ %cmp.i.i7.not.i = icmp eq ptr %incdec.ptr.i.i, %1
+ br i1 %cmp.i.i7.not.i, label %br2, label %for.body.i
+
+br2: ; preds = %for.inc.i
+ br label %br1
+
+br1: ; preds = %br2, %entry, %for.body.for.end.loopexit_crit_edge.i
+ %coerce.val.pi.pre-phi.i = phi i64 [ %coerce.val.pi.i.i14, %entry ], [ %.pre14.i, %for.body.for.end.loopexit_crit_edge.i ], [ %coerce.val.pi.i.i14, %br2 ]
+ ret i64 %coerce.val.pi.pre-phi.i
+}
+; Function Attrs: mustprogress nocallback nounwind willreturn memory(inaccessiblemem: write)
+
+; Function Attrs: mustprogress norecurse nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync)
+define i64 @fenceVectorizingTest(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) %v, i32 noundef %n) local_unnamed_addr #0 {
+; CHECK-LABEL: define i64 @fenceVectorizingTest(
+; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[V:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[V]], align 8
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT: [[__END__I:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
+; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__END__I]], align 8
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT: [[CMP_I_I7_NOT11_I:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_I_I7_NOT11_I]], label %[[BR1:.*]], label %[[FOR_BODY_I:.*]]
+; CHECK: [[FOR_BODY_I]]:
+; CHECK-NEXT: [[__FIRST_SROA_0_012_I:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[FOR_INC_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[__FIRST_SROA_0_012_I]], align 4
+; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp slt i32 [[TMP2]], [[N]]
+; CHECK-NEXT: br i1 [[CMP_I_I]], label %[[BR1]], label %[[FOR_INC_I]]
+; CHECK: [[FOR_INC_I]]:
+; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__FIRST_SROA_0_012_I]], i64 4
+; CHECK-NEXT: [[CMP_I_I7_NOT_I:%.*]] = icmp eq ptr [[INCDEC_PTR_I_I]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_I_I7_NOT_I]], label %[[BR1]], label %[[FOR_BODY_I]]
+; CHECK: [[BR1]]:
+; CHECK-NEXT: [[COERCE_VAL_PI_PRE_PHI_I_IN:%.*]] = phi ptr [ [[TMP1]], %[[ENTRY]] ], [ [[TMP1]], %[[FOR_INC_I]] ], [ [[__FIRST_SROA_0_012_I]], %[[FOR_BODY_I]] ]
+; CHECK-NEXT: [[COERCE_VAL_PI_PRE_PHI_I:%.*]] = ptrtoint ptr [[COERCE_VAL_PI_PRE_PHI_I_IN]] to i64
+; CHECK-NEXT: ret i64 [[COERCE_VAL_PI_PRE_PHI_I]]
+;
+entry:
+ %0 = load ptr, ptr %v, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+ %coerce.val.pi.i.i = ptrtoint ptr %0 to i64
+ %__end_.i = getelementptr inbounds nuw i8, ptr %v, i64 8
+ %1 = load ptr, ptr %__end_.i, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+ %coerce.val.pi.i.i14 = ptrtoint ptr %1 to i64
+ %sub.ptr.sub.i.i.i = sub i64 %coerce.val.pi.i.i14, %coerce.val.pi.i.i
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %0, i64 %sub.ptr.sub.i.i.i) ]
+ fence seq_cst ; Fence Instruction
+ call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+ %cmp.i.i7.not11.i = icmp eq ptr %0, %1
+ br i1 %cmp.i.i7.not11.i, label %br1, label %for.body.i.preheader
+
+for.body.i.preheader: ; preds = %entry
+ br label %for.body.i
+
+for.body.i: ; preds = %for.body.i.preheader, %for.inc.i
+ %__first.sroa.0.012.i = phi ptr [ %incdec.ptr.i.i, %for.inc.i ], [ %0, %for.body.i.preheader ]
+ %2 = load i32, ptr %__first.sroa.0.012.i, align 4
+ %cmp.i.i = icmp slt i32 %2, %n
+ br i1 %cmp.i.i, label %for.body.for.end.loopexit_crit_edge.i, label %for.inc.i
+
+for.body.for.end.loopexit_crit_edge.i: ; preds = %for.body.i
+ %__first.sroa.0.012.i.lcssa = phi ptr [ %__first.sroa.0.012.i, %for.body.i ]
+ %.pre14.i = ptrtoint ptr %__first.sroa.0.012.i.lcssa to i64
+ br label %br1
+
+for.inc.i: ; preds = %for.body.i
+ %incdec.ptr.i.i = getelementptr inbounds nuw i8, ptr %__first.sroa.0.012.i, i64 4
+ %cmp.i.i7.not.i = icmp eq ptr %incdec.ptr.i.i, %1
+ br i1 %cmp.i.i7.not.i, label %br2, label %for.body.i
+
+br2: ; preds = %for.inc.i
+ br label %br1
+
+br1: ; preds = %br2, %entry, %for.body.for.end.loopexit_crit_edge.i
+ %coerce.val.pi.pre-phi.i = phi i64 [ %coerce.val.pi.i.i14, %entry ], [ %.pre14.i, %for.body.for.end.loopexit_crit_edge.i ], [ %coerce.val.pi.i.i14, %br2 ]
+ ret i64 %coerce.val.pi.pre-phi.i
+}
+; Function Attrs: mustprogress nocallback nounwind willreturn memory(inaccessiblemem: write)
+
+; Function Attrs: mustprogress norecurse nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync)
+define i64 @atomicVectorizingTest(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) %v, i32 noundef %n) local_unnamed_addr #0 {
+; CHECK-LABEL: define i64 @atomicVectorizingTest(
+; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[V:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[V]], align 8
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT: [[__END__I:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
+; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__END__I]], align 8
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT: [[ATOMIC:%.*]] = load atomic ptr, ptr [[V]] seq_cst, align 8
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT: [[CMP_I_I7_NOT11_I:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_I_I7_NOT11_I]], label %[[BR1:.*]], label %[[FOR_BODY_I:.*]]
+; CHECK: [[FOR_BODY_I]]:
+; CHECK-NEXT: [[__FIRST_SROA_0_012_I:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[FOR_INC_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[__FIRST_SROA_0_012_I]], align 4
+; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp slt i32 [[TMP2]], [[N]]
+; CHECK-NEXT: br i1 [[CMP_I_I]], label %[[BR1]], label %[[FOR_INC_I]]
+; CHECK: [[FOR_INC_I]]:
+; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__FIRST_SROA_0_012_I]], i64 4
+; CHECK-NEXT: [[CMP_I_I7_NOT_I:%.*]] = icmp eq ptr [[INCDEC_PTR_I_I]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_I_I7_NOT_I]], label %[[BR1]], label %[[FOR_BODY_I]]
+; CHECK: [[BR1]]:
+; CHECK-NEXT: [[COERCE_VAL_PI_PRE_PHI_I_IN:%.*]] = phi ptr [ [[TMP1]], %[[ENTRY]] ], [ [[TMP1]], %[[FOR_INC_I]] ], [ [[__FIRST_SROA_0_012_I]], %[[FOR_BODY_I]] ]
+; CHECK-NEXT: [[COERCE_VAL_PI_PRE_PHI_I:%.*]] = ptrtoint ptr [[COERCE_VAL_PI_PRE_PHI_I_IN]] to i64
+; CHECK-NEXT: ret i64 [[COERCE_VAL_PI_PRE_PHI_I]]
+;
+entry:
+ %0 = load ptr, ptr %v, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+ %coerce.val.pi.i.i = ptrtoint ptr %0 to i64
+ %__end_.i = getelementptr inbounds nuw i8, ptr %v, i64 8
+ %1 = load ptr, ptr %__end_.i, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+ %coerce.val.pi.i.i14 = ptrtoint ptr %1 to i64
+ %sub.ptr.sub.i.i.i = sub i64 %coerce.val.pi.i.i14, %coerce.val.pi.i.i
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %0, i64 %sub.ptr.sub.i.i.i) ]
+ %atomic = load atomic ptr, ptr %v seq_cst, align 8 ; Atomic Instruction
+ call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+ %cmp.i.i7.not11.i = icmp eq ptr %0, %1
+ br i1 %cmp.i.i7.not11.i, label %br1, label %for.body.i.preheader
+for.body.i.preheader: ; preds = %entry
+ br label %for.body.i
+
+for.body.i: ; preds = %for.body.i.preheader, %for.inc.i
+ %__first.sroa.0.012.i = phi ptr [ %incdec.ptr.i.i, %for.inc.i ], [ %0, %for.body.i.preheader ]
+ %2 = load i32, ptr %__first.sroa.0.012.i, align 4
+ %cmp.i.i = icmp slt i32 %2, %n
+ br i1 %cmp.i.i, label %for.body.for.end.loopexit_crit_edge.i, label %for.inc.i
+
+for.body.for.end.loopexit_crit_edge.i: ; preds = %for.body.i
+ %__first.sroa.0.012.i.lcssa = phi ptr [ %__first.sroa.0.012.i, %for.body.i ]
+ %.pre14.i = ptrtoint ptr %__first.sroa.0.012.i.lcssa to i64
+ br label %br1
+
+for.inc.i: ; preds = %for.body.i
+ %incdec.ptr.i.i = getelementptr inbounds nuw i8, ptr %__first.sroa.0.012.i, i64 4
+ %cmp.i.i7.not.i = icmp eq ptr %incdec.ptr.i.i, %1
+ br i1 %cmp.i.i7.not.i, label %br2, label %for.body.i
+
+br2: ; preds = %for.inc.i
+ br label %br1
+
+br1: ; preds = %br2, %entry, %for.body.for.end.loopexit_crit_edge.i
+ %coerce.val.pi.pre-phi.i = phi i64 [ %coerce.val.pi.i.i14, %entry ], [ %.pre14.i, %for.body.for.end.loopexit_crit_edge.i ], [ %coerce.val.pi.i.i14, %br2 ]
+ ret i64 %coerce.val.pi.pre-phi.i
+}
; Function Attrs: mustprogress nocallback nounwind willreturn memory(inaccessiblemem: write)
+
declare void @llvm.assume(i1 noundef) #1
attributes #0 = { mustprogress nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync) "frame-pointer"="non-leaf-no-reserve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
>From aa080e0fe08258460b394f0764dccbf6d49bcecf Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Sat, 14 Feb 2026 02:19:36 +0530
Subject: [PATCH 6/9] nit
---
llvm/lib/Analysis/ValueTracking.cpp | 28 ++++++++--------------------
1 file changed, 8 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index ba08abe4c6748..476460b425b0f 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -704,21 +704,10 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
bool llvm::willNotFreeBetween(const Instruction *Assume,
const Instruction *CtxI) {
- // Helper to check if there are any calls in the range that may free memory.
- auto hasNoFreeCalls = [](auto Range) {
- for (const auto &[Idx, I] : enumerate(Range)) {
- if (Idx > MaxInstrsToCheckForFree)
- return false;
- if (const auto *CB = dyn_cast<CallBase>(&I))
- if (!CB->hasFnAttr(Attribute::NoFree))
- return false;
- }
- return true;
- };
-
// Helper to make sure the current function cannot arrange for
- // another thread to free on its behalf.
- auto hasNoSync = [](auto Range) {
+ // another thread to free on its behalf and to check if there
+ // are any calls in the range that may free memory.
+ auto hasNoSyncOrFreeCall = [](auto Range) {
for (const auto &[Idx, I] : enumerate(Range)) {
if (Idx > MaxInstrsToCheckForFree)
return false;
@@ -743,6 +732,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
llvm_unreachable("unknown atomic instruction?");
}
};
+
// An ordered atomic may synchronize.
if (isOrderedAtomic(&I)) {
return false;
@@ -753,7 +743,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
// Non call site cases covered by the two checks above
continue;
- if (CB->hasFnAttr(Attribute::NoSync))
+ if (CB->hasFnAttr(Attribute::NoSync) || CB->hasFnAttr(Attribute::NoFree))
continue;
// Non volatile memset/memcpy/memmoves are nosync
@@ -773,9 +763,8 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
if (CtxBB->getSinglePredecessor() != AssumeBB)
return false;
- if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)) ||
- !hasNoSync(make_range(CtxBB->begin(), CtxIter)))
- return false;
+ if (!hasNoSyncOrFreeCall(make_range(CtxBB->begin(), CtxIter)))
+ return false;
CtxIter = AssumeBB->end();
} else {
@@ -785,8 +774,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
}
// Check if there are any calls between Assume and CtxIter that may free
// memory.
- return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter)) &&
- hasNoSync(make_range(Assume->getIterator(), CtxIter));
+ return hasNoSyncOrFreeCall(make_range(Assume->getIterator(), CtxIter));
}
// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
>From 780bf98dc5cccc47d0b2a5d0fe4c884eeae2dde6 Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Sat, 14 Feb 2026 18:54:22 +0530
Subject: [PATCH 7/9] Shared isOrderedAtomic function
---
llvm/include/llvm/Analysis/ValueTracking.h | 7 +++
llvm/lib/Analysis/ValueTracking.cpp | 44 ++++++++++---------
llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 24 +---------
...able-info-from-assumption-constant-size.ll | 28 +++---------
.../SimplifyCFG/speculate-derefable-load.ll | 19 +++-----
5 files changed, 42 insertions(+), 80 deletions(-)
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 1cd88fd89aea2..03c5783e7f8f3 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -821,6 +821,13 @@ inline bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC,
return isGuaranteedNotToBePoison(V, AC, &*CtxI, DT, Depth);
}
+// Return true if this is an atomic which has an ordering stronger than
+// unordered. Note that this is different than the predicate we use in
+// Attributor. Here we chose to be conservative and consider monotonic
+// operations potentially synchronizing. We generally don't do much with
+// monotonic operations, so this is simply risk reduction.
+bool isOrderedAtomic(const Instruction *I);
+
/// Returns true if V cannot be undef, but may be poison.
LLVM_ABI bool isGuaranteedNotToBeUndef(const Value *V,
AssumptionCache *AC = nullptr,
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 476460b425b0f..61d6a672a4215 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -715,28 +715,9 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
return false;
}
- auto isOrderedAtomic = [](const Instruction *Inst) {
- if (!Inst->isAtomic())
- return false;
-
- if (auto *FI = dyn_cast<FenceInst>(Inst))
- // All legal orderings for fence are stronger than monotonic.
- return FI->getSyncScopeID() != SyncScope::SingleThread;
- else if (isa<AtomicCmpXchgInst>(Inst) || isa<AtomicRMWInst>(Inst))
- return true;
- else if (auto *SI = dyn_cast<StoreInst>(Inst))
- return !SI->isUnordered();
- else if (auto *LI = dyn_cast<LoadInst>(Inst))
- return !LI->isUnordered();
- else {
- llvm_unreachable("unknown atomic instruction?");
- }
- };
-
// An ordered atomic may synchronize.
- if (isOrderedAtomic(&I)) {
+ if (llvm::isOrderedAtomic(&I))
return false;
- }
auto *CB = dyn_cast<CallBase>(&I);
if (!CB)
@@ -7880,6 +7861,29 @@ bool llvm::isGuaranteedNotToBeUndef(const Value *V, AssumptionCache *AC,
UndefPoisonKind::UndefOnly);
}
+// Return true if this is an atomic which has an ordering stronger than
+// unordered. Note that this is different than the predicate we use in
+// Attributor. Here we chose to be conservative and consider monotonic
+// operations potentially synchronizing. We generally don't do much with
+// monotonic operations, so this is simply risk reduction.
+bool llvm::isOrderedAtomic(const Instruction *I) {
+ if (!I->isAtomic())
+ return false;
+
+ if (auto *FI = dyn_cast<FenceInst>(I))
+ // All legal orderings for fence are stronger than monotonic.
+ return FI->getSyncScopeID() != SyncScope::SingleThread;
+ else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I))
+ return true;
+ else if (auto *SI = dyn_cast<StoreInst>(I))
+ return !SI->isUnordered();
+ else if (auto *LI = dyn_cast<LoadInst>(I))
+ return !LI->isUnordered();
+ else {
+ llvm_unreachable("unknown atomic instruction?");
+ }
+}
+
/// Return true if undefined behavior would provably be executed on the path to
/// OnPathTo if Root produced a posion result. Note that this doesn't say
/// anything about whether OnPathTo is actually executed or whether Root is
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 855692db006f9..e661c099612b2 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1909,28 +1909,6 @@ static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
return true;
}
-// Return true if this is an atomic which has an ordering stronger than
-// unordered. Note that this is different than the predicate we use in
-// Attributor. Here we chose to be conservative and consider monotonic
-// operations potentially synchronizing. We generally don't do much with
-// monotonic operations, so this is simply risk reduction.
-static bool isOrderedAtomic(Instruction *I) {
- if (!I->isAtomic())
- return false;
-
- if (auto *FI = dyn_cast<FenceInst>(I))
- // All legal orderings for fence are stronger than monotonic.
- return FI->getSyncScopeID() != SyncScope::SingleThread;
- else if (isa<AtomicCmpXchgInst>(I) || isa<AtomicRMWInst>(I))
- return true;
- else if (auto *SI = dyn_cast<StoreInst>(I))
- return !SI->isUnordered();
- else if (auto *LI = dyn_cast<LoadInst>(I))
- return !LI->isUnordered();
- else {
- llvm_unreachable("unknown atomic instruction?");
- }
-}
static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
// Volatile may synchronize
@@ -1938,7 +1916,7 @@ static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
return true;
// An ordered atomic may synchronize. (See comment about on monotonic.)
- if (isOrderedAtomic(&I))
+ if (llvm::isOrderedAtomic(&I))
return true;
auto *CB = dyn_cast<CallBase>(&I);
diff --git a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
index 1773b2aab7d87..43abda5d7774f 100644
--- a/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
+++ b/llvm/test/Transforms/LoopVectorize/dereferenceable-info-from-assumption-constant-size.ll
@@ -1326,31 +1326,13 @@ define void @deref_assumption_in_header_constant_trip_count_nofree_via_context_b
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE2:.*]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> [[WIDE_LOAD]], zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
-; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
-; CHECK: [[PRED_LOAD_IF]]:
-; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE]]
-; CHECK: [[PRED_LOAD_CONTINUE]]:
-; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x i32> [ poison, %[[VECTOR_BODY]] ], [ [[TMP6]], %[[PRED_LOAD_IF]] ]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
-; CHECK-NEXT: br i1 [[TMP8]], label %[[PRED_LOAD_IF1:.*]], label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_IF1]]:
-; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[TMP10]], align 4
-; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP7]], i32 [[TMP11]], i32 1
-; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
-; CHECK: [[PRED_LOAD_CONTINUE2]]:
-; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP7]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP1]], <2 x i32> [[TMP13]], <2 x i32> [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <2 x i32> [[WIDE_LOAD]], zeroinitializer
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[WIDE_LOAD1]]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[INDEX]]
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
diff --git a/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll b/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll
index b8c999d700aa7..693da976d1752 100644
--- a/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll
+++ b/llvm/test/Transforms/SimplifyCFG/speculate-derefable-load.ll
@@ -4,14 +4,10 @@
define i64 @align_deref_align(i1 %c, ptr %p) {
; CHECK-LABEL: define i64 @align_deref_align(
; CHECK-SAME: i1 [[C:%.*]], ptr [[P:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 8), "align"(ptr [[P]], i64 8) ]
-; CHECK-NEXT: br i1 [[C]], label %[[IF:.*]], label %[[EXIT:.*]]
-; CHECK: [[IF]]:
; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P]], align 8
-; CHECK-NEXT: br label %[[EXIT]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[V]], %[[IF]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[C]], i64 [[V]], i64 0
; CHECK-NEXT: ret i64 [[RES]]
;
entry:
@@ -52,17 +48,12 @@ exit:
define i64 @assume_deref_align2(i1 %c1, i32 %x, ptr %p) {
; CHECK-LABEL: define i64 @assume_deref_align2(
; CHECK-SAME: i1 [[C1:%.*]], i32 [[X:%.*]], ptr [[P:%.*]]) {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[P]], i64 8), "align"(ptr [[P]], i64 8) ]
-; CHECK-NEXT: br i1 [[C1]], label %[[IF1:.*]], label %[[EXIT:.*]]
-; CHECK: [[IF1]]:
; CHECK-NEXT: [[C2:%.*]] = icmp ugt i32 [[X]], 10
-; CHECK-NEXT: br i1 [[C2]], label %[[IF2:.*]], label %[[EXIT]]
-; CHECK: [[IF2]]:
; CHECK-NEXT: [[V:%.*]] = load i64, ptr [[P]], align 8
-; CHECK-NEXT: br label %[[EXIT]]
-; CHECK: [[EXIT]]:
-; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[V]], %[[IF2]] ], [ 1, %[[IF1]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[C2]], i64 [[V]], i64 1
+; CHECK-NEXT: [[RES:%.*]] = select i1 [[C1]], i64 [[SPEC_SELECT]], i64 0
; CHECK-NEXT: ret i64 [[RES]]
;
entry:
>From 45326fab2153ba7e0fab3281b2b073dfc498b041 Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Sat, 14 Feb 2026 18:57:17 +0530
Subject: [PATCH 8/9] nit
---
llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index e661c099612b2..b0bb2de9716b1 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1909,7 +1909,6 @@ static bool InstrBreaksNoFree(Instruction &I, const SCCNodeSet &SCCNodes) {
return true;
}
-
static bool InstrBreaksNoSync(Instruction &I, const SCCNodeSet &SCCNodes) {
// Volatile may synchronize
if (I.isVolatile())
>From 689c2823e69860860ffb3819ab8d87c0e6a1dada Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Sat, 14 Feb 2026 21:17:15 +0530
Subject: [PATCH 9/9] Corrected hasNoSync logic
---
llvm/lib/Analysis/ValueTracking.cpp | 14 ++---
.../Transforms/LoopVectorize/issue180180.ll | 63 ++-----------------
2 files changed, 13 insertions(+), 64 deletions(-)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 61d6a672a4215..f2f638a70369d 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -720,17 +720,17 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
return false;
auto *CB = dyn_cast<CallBase>(&I);
- if (!CB)
+ if (CB) {
// Non call site cases covered by the two checks above
- continue;
-
- if (CB->hasFnAttr(Attribute::NoSync) || CB->hasFnAttr(Attribute::NoFree))
- continue;
+ if (!CB->hasFnAttr(Attribute::NoSync) || !CB->hasFnAttr(Attribute::NoFree))
+ return false;
+ }
// Non volatile memset/memcpy/memmoves are nosync
if (auto *MI = dyn_cast<MemIntrinsic>(&I))
- if (!MI->isVolatile())
- continue;
+ if (MI->isVolatile())
+ return false;
+
return false;
}
return true;
diff --git a/llvm/test/Transforms/LoopVectorize/issue180180.ll b/llvm/test/Transforms/LoopVectorize/issue180180.ll
index 14352440f44ca..dcc67f190b69f 100644
--- a/llvm/test/Transforms/LoopVectorize/issue180180.ll
+++ b/llvm/test/Transforms/LoopVectorize/issue180180.ll
@@ -15,67 +15,22 @@ define i64 @notVectorizingTest(ptr noundef nonnull readonly align 8 captures(non
; CHECK-NEXT: [[__END__I:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__END__I]], align 8
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
-; CHECK-NEXT: [[COERCE_VAL_PI_I_I14:%.*]] = ptrtoint ptr [[TMP1]] to i64
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
; CHECK-NEXT: [[CMP_I_I7_NOT11_I:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
-; CHECK-NEXT: br i1 [[CMP_I_I7_NOT11_I]], label %[[BR1:.*]], label %[[FOR_BODY_I_PREHEADER:.*]]
-; CHECK: [[FOR_BODY_I_PREHEADER]]:
-; CHECK-NEXT: [[COERCE_VAL_PI_I_I:%.*]] = ptrtoint ptr [[TMP0]] to i64
-; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[COERCE_VAL_PI_I_I14]], -4
-; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[COERCE_VAL_PI_I_I]]
-; CHECK-NEXT: [[TMP4:%.*]] = lshr exact i64 [[TMP3]], 2
-; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 12
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_I_PREHEADER6:.*]], label %[[VECTOR_PH:.*]]
-; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP5]], 9223372036854775804
-; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[N_VEC]], 2
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP6]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
-; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
-; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
-; CHECK-NEXT: [[WIDE_LOAD_FR:%.*]] = freeze <4 x i32> [[WIDE_LOAD]]
-; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD_FR]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i1> [[TMP8]] to i4
-; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP9]], 0
-; CHECK-NEXT: br i1 [[DOTNOT]], label %[[VECTOR_BODY_INTERIM]], label %[[VECTOR_EARLY_EXIT:.*]]
-; CHECK: [[VECTOR_BODY_INTERIM]]:
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
-; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP5]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[CMP_N]], label %[[BR1]], label %[[FOR_BODY_I_PREHEADER6]]
-; CHECK: [[FOR_BODY_I_PREHEADER6]]:
-; CHECK-NEXT: [[__FIRST_SROA_0_012_I_PH:%.*]] = phi ptr [ [[TMP0]], %[[FOR_BODY_I_PREHEADER]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
-; CHECK-NEXT: br label %[[FOR_BODY_I:.*]]
-; CHECK: [[VECTOR_EARLY_EXIT]]:
-; CHECK-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 false)
-; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 2
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP13]]
-; CHECK-NEXT: br label %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I:.*]]
+; CHECK-NEXT: br i1 [[CMP_I_I7_NOT11_I]], label %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I:.*]], label %[[FOR_BODY_I:.*]]
; CHECK: [[FOR_BODY_I]]:
-; CHECK-NEXT: [[__FIRST_SROA_0_012_I:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[FOR_INC_I:.*]] ], [ [[__FIRST_SROA_0_012_I_PH]], %[[FOR_BODY_I_PREHEADER6]] ]
+; CHECK-NEXT: [[__FIRST_SROA_0_012_I:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[FOR_INC_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ]
; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[__FIRST_SROA_0_012_I]], align 4
; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp slt i32 [[TMP15]], [[N]]
; CHECK-NEXT: br i1 [[CMP_I_I]], label %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]], label %[[FOR_INC_I]]
-; CHECK: [[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]]:
-; CHECK-NEXT: [[__FIRST_SROA_0_012_I_LCSSA:%.*]] = phi ptr [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ], [ [[__FIRST_SROA_0_012_I]], %[[FOR_BODY_I]] ]
-; CHECK-NEXT: [[DOTPRE14_I:%.*]] = ptrtoint ptr [[__FIRST_SROA_0_012_I_LCSSA]] to i64
-; CHECK-NEXT: br label %[[BR1]]
; CHECK: [[FOR_INC_I]]:
; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__FIRST_SROA_0_012_I]], i64 4
; CHECK-NEXT: [[CMP_I_I7_NOT_I:%.*]] = icmp eq ptr [[INCDEC_PTR_I_I]], [[TMP1]]
-; CHECK-NEXT: br i1 [[CMP_I_I7_NOT_I]], label %[[BR1]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP3:![0-9]+]]
-; CHECK: [[BR1]]:
-; CHECK-NEXT: [[COERCE_VAL_PI_PRE_PHI_I:%.*]] = phi i64 [ [[COERCE_VAL_PI_I_I14]], %[[ENTRY]] ], [ [[DOTPRE14_I]], %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]] ], [ [[COERCE_VAL_PI_I_I14]], %[[MIDDLE_BLOCK]] ], [ [[COERCE_VAL_PI_I_I14]], %[[FOR_INC_I]] ]
+; CHECK-NEXT: br i1 [[CMP_I_I7_NOT_I]], label %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]], label %[[FOR_BODY_I]]
+; CHECK: [[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]]:
+; CHECK-NEXT: [[COERCE_VAL_PI_PRE_PHI_I_IN:%.*]] = phi ptr [ [[TMP1]], %[[ENTRY]] ], [ [[TMP1]], %[[FOR_INC_I]] ], [ [[__FIRST_SROA_0_012_I]], %[[FOR_BODY_I]] ]
+; CHECK-NEXT: [[COERCE_VAL_PI_PRE_PHI_I:%.*]] = ptrtoint ptr [[COERCE_VAL_PI_PRE_PHI_I_IN]] to i64
; CHECK-NEXT: ret i64 [[COERCE_VAL_PI_PRE_PHI_I]]
;
entry:
@@ -344,9 +299,3 @@ declare void @llvm.assume(i1 noundef) #1
attributes #0 = { mustprogress nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync) "frame-pointer"="non-leaf-no-reserve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
-;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
-;.
More information about the llvm-commits
mailing list