[llvm] [ValueTracking] Conservative nosync check prevents vectorization (PR #181345)

Kshitij Paranjape via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 13 12:30:16 PST 2026


https://github.com/kshitijvp updated https://github.com/llvm/llvm-project/pull/181345

>From 8fc6d194fe35a4842297e5a825b2cd9100937143 Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Fri, 13 Feb 2026 16:12:18 +0530
Subject: [PATCH 1/5] [ValueTracking] Conservative nosync check prevents
 vectorization

---
 llvm/lib/Analysis/ValueTracking.cpp           |  18 ++-
 .../Transforms/LoopVectorize/issue180180.ll   | 133 ++++++++++++++++++
 2 files changed, 146 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/issue180180.ll

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 8761b7bcb51a2..37f7005fe4c3d 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -715,11 +715,19 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
     }
     return true;
   };
-
+  
   // Make sure the current function cannot arrange for another thread to free on
   // its behalf.
-  if (!CtxI->getFunction()->hasNoSync())
-    return false;
+  auto hasNoSyncCalls = [](auto Range) {
+    for (const auto &[Idx, I] : enumerate(Range)) {
+      if (Idx > MaxInstrsToCheckForFree)
+        return false;
+      if (const auto *CB = dyn_cast<CallBase>(&I))
+        if (!CB->hasFnAttr(Attribute::NoSync))
+          return false;
+    }
+    return true;
+  };
 
   // Handle cross-block case: CtxI in a successor of Assume's block.
   const BasicBlock *CtxBB = CtxI->getParent();
@@ -729,7 +737,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
     if (CtxBB->getSinglePredecessor() != AssumeBB)
       return false;
 
-    if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)))
+    if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)) || !hasNoSyncCalls(make_range(CtxBB->begin(), CtxIter)))
       return false;
 
     CtxIter = AssumeBB->end();
@@ -741,7 +749,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
 
   // Check if there are any calls between Assume and CtxIter that may free
   // memory.
-  return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter));
+  return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter)) && hasNoSyncCalls(make_range(Assume->getIterator(), CtxIter));
 }
 
 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
diff --git a/llvm/test/Transforms/LoopVectorize/issue180180.ll b/llvm/test/Transforms/LoopVectorize/issue180180.ll
new file mode 100644
index 0000000000000..b17acb67488cd
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/issue180180.ll
@@ -0,0 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt %s -O3 -debug-only=loop-vectorize -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at ptr = local_unnamed_addr global ptr null, align 8
+
+; Function Attrs: mustprogress norecurse nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync)
+define i64 @notVectorizingTest(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) %v, i32 noundef %n) local_unnamed_addr #0 {
+; CHECK-LABEL: define i64 @notVectorizingTest(
+; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[V:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V]], align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT:    [[__END__I:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__END__I]], align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT:    [[COERCE_VAL_PI_I_I14:%.*]] = ptrtoint ptr [[TMP1]] to i64
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT:    [[CMP_I_I7_NOT11_I:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I_I7_NOT11_I]], label %[[BR1:.*]], label %[[FOR_BODY_I_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_I_PREHEADER]]:
+; CHECK-NEXT:    [[COERCE_VAL_PI_I_I:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[COERCE_VAL_PI_I_I14]], -4
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[COERCE_VAL_PI_I_I]]
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr exact i64 [[TMP3]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 12
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_I_PREHEADER6:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP5]], 9223372036854775804
+; CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[N_VEC]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP6]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
+; CHECK-NEXT:    [[WIDE_LOAD_FR:%.*]] = freeze <4 x i32> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD_FR]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i1> [[TMP8]] to i4
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i4 [[TMP9]], 0
+; CHECK-NEXT:    br i1 [[DOTNOT]], label %[[VECTOR_BODY_INTERIM]], label %[[VECTOR_EARLY_EXIT:.*]]
+; CHECK:       [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP5]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[BR1]], label %[[FOR_BODY_I_PREHEADER6]]
+; CHECK:       [[FOR_BODY_I_PREHEADER6]]:
+; CHECK-NEXT:    [[__FIRST_SROA_0_012_I_PH:%.*]] = phi ptr [ [[TMP0]], %[[FOR_BODY_I_PREHEADER]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY_I:.*]]
+; CHECK:       [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 false)
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = shl i64 [[TMP12]], 2
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP13]]
+; CHECK-NEXT:    br label %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I:.*]]
+; CHECK:       [[FOR_BODY_I]]:
+; CHECK-NEXT:    [[__FIRST_SROA_0_012_I:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[FOR_INC_I:.*]] ], [ [[__FIRST_SROA_0_012_I_PH]], %[[FOR_BODY_I_PREHEADER6]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[__FIRST_SROA_0_012_I]], align 4
+; CHECK-NEXT:    [[CMP_I_I:%.*]] = icmp slt i32 [[TMP15]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP_I_I]], label %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]], label %[[FOR_INC_I]]
+; CHECK:       [[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]]:
+; CHECK-NEXT:    [[__FIRST_SROA_0_012_I_LCSSA:%.*]] = phi ptr [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ], [ [[__FIRST_SROA_0_012_I]], %[[FOR_BODY_I]] ]
+; CHECK-NEXT:    [[DOTPRE14_I:%.*]] = ptrtoint ptr [[__FIRST_SROA_0_012_I_LCSSA]] to i64
+; CHECK-NEXT:    br label %[[BR1]]
+; CHECK:       [[FOR_INC_I]]:
+; CHECK-NEXT:    [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__FIRST_SROA_0_012_I]], i64 4
+; CHECK-NEXT:    [[CMP_I_I7_NOT_I:%.*]] = icmp eq ptr [[INCDEC_PTR_I_I]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I_I7_NOT_I]], label %[[BR1]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[BR1]]:
+; CHECK-NEXT:    [[COERCE_VAL_PI_PRE_PHI_I:%.*]] = phi i64 [ [[COERCE_VAL_PI_I_I14]], %[[ENTRY]] ], [ [[DOTPRE14_I]], %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]] ], [ [[COERCE_VAL_PI_I_I14]], %[[MIDDLE_BLOCK]] ], [ [[COERCE_VAL_PI_I_I14]], %[[FOR_INC_I]] ]
+; CHECK-NEXT:    ret i64 [[COERCE_VAL_PI_PRE_PHI_I]]
+;
+entry:
+  %0 = load ptr, ptr %v, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+  %coerce.val.pi.i.i = ptrtoint ptr %0 to i64
+  %__end_.i = getelementptr inbounds nuw i8, ptr %v, i64 8
+  %1 = load ptr, ptr %__end_.i, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+  %coerce.val.pi.i.i14 = ptrtoint ptr %1 to i64
+  %sub.ptr.sub.i.i.i = sub i64 %coerce.val.pi.i.i14, %coerce.val.pi.i.i
+  call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %0, i64 %sub.ptr.sub.i.i.i) ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+  %cmp.i.i7.not11.i = icmp eq ptr %0, %1
+  br i1 %cmp.i.i7.not11.i, label %br1, label %for.body.i.preheader
+
+for.body.i.preheader:                             ; preds = %entry
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i.preheader, %for.inc.i
+  %__first.sroa.0.012.i = phi ptr [ %incdec.ptr.i.i, %for.inc.i ], [ %0, %for.body.i.preheader ]
+  %2 = load i32, ptr %__first.sroa.0.012.i, align 4
+  %cmp.i.i = icmp slt i32 %2, %n
+  br i1 %cmp.i.i, label %for.body.for.end.loopexit_crit_edge.i, label %for.inc.i
+
+for.body.for.end.loopexit_crit_edge.i:            ; preds = %for.body.i
+  %__first.sroa.0.012.i.lcssa = phi ptr [ %__first.sroa.0.012.i, %for.body.i ]
+  %.pre14.i = ptrtoint ptr %__first.sroa.0.012.i.lcssa to i64
+  br label %br1
+
+for.inc.i:                                        ; preds = %for.body.i
+  %incdec.ptr.i.i = getelementptr inbounds nuw i8, ptr %__first.sroa.0.012.i, i64 4
+  %cmp.i.i7.not.i = icmp eq ptr %incdec.ptr.i.i, %1
+  br i1 %cmp.i.i7.not.i, label %br2, label %for.body.i
+
+br2: ; preds = %for.inc.i
+  br label %br1
+
+br1: ; preds = %br2, %entry, %for.body.for.end.loopexit_crit_edge.i
+  %coerce.val.pi.pre-phi.i = phi i64 [ %coerce.val.pi.i.i14, %entry ], [ %.pre14.i, %for.body.for.end.loopexit_crit_edge.i ], [ %coerce.val.pi.i.i14, %br2 ]
+  ret i64 %coerce.val.pi.pre-phi.i
+}
+
+; Function Attrs: mustprogress nocallback nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef) #1
+
+attributes #0 = { mustprogress nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync) "frame-pointer"="non-leaf-no-reserve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.

>From e16296d0ccc16f5a9fa23fe2ebf192688eca2196 Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Fri, 13 Feb 2026 16:22:06 +0530
Subject: [PATCH 2/5] nit

---
 llvm/lib/Analysis/ValueTracking.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 37f7005fe4c3d..7188df0da9fd3 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -715,9 +715,9 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
     }
     return true;
   };
-  
-  // Make sure the current function cannot arrange for another thread to free on
-  // its behalf.
+
+  // Helper to make sure the current function cannot arrange for 
+  // another thread to free on its behalf.
   auto hasNoSyncCalls = [](auto Range) {
     for (const auto &[Idx, I] : enumerate(Range)) {
       if (Idx > MaxInstrsToCheckForFree)

>From 3e98c0b0535061482a926e8900e42cf41e1a3e36 Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Fri, 13 Feb 2026 16:24:40 +0530
Subject: [PATCH 3/5] nit

---
 llvm/lib/Analysis/ValueTracking.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 7188df0da9fd3..92f673566edde 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -737,7 +737,8 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
     if (CtxBB->getSinglePredecessor() != AssumeBB)
       return false;
 
-    if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)) || !hasNoSyncCalls(make_range(CtxBB->begin(), CtxIter)))
+    if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)) ||
+        !hasNoSyncCalls(make_range(CtxBB->begin(), CtxIter)))
       return false;
 
     CtxIter = AssumeBB->end();
@@ -749,7 +750,8 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
 
   // Check if there are any calls between Assume and CtxIter that may free
   // memory.
-  return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter)) && hasNoSyncCalls(make_range(Assume->getIterator(), CtxIter));
+  return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter)) &&
+         hasNoSyncCalls(make_range(Assume->getIterator(), CtxIter));
 }
 
 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but

>From 77b28d9e6747446525377778f2b1e1ea7ee13c4f Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Fri, 13 Feb 2026 16:28:36 +0530
Subject: [PATCH 4/5] nitpick

---
 llvm/lib/Analysis/ValueTracking.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 92f673566edde..b3d159e4ff43e 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -716,7 +716,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
     return true;
   };
 
-  // Helper to make sure the current function cannot arrange for 
+  // Helper to make sure the current function cannot arrange for
   // another thread to free on its behalf.
   auto hasNoSyncCalls = [](auto Range) {
     for (const auto &[Idx, I] : enumerate(Range)) {

>From ff0f1335be44919bfa1681368202f6cc8deb21aa Mon Sep 17 00:00:00 2001
From: Kshitij Paranjape <kshitijvparanjape at gmail.com>
Date: Sat, 14 Feb 2026 01:56:41 +0530
Subject: [PATCH 5/5] Fixed hasNoSync logic and added negative tests

Added checks in NoSync lambda function to account for
other synchronizing instructions such as fence,
volatile, atomic instructions. Also added more negative
tests that do not get vectorized when they contain
synchronizing instructions between assume instruction
and CtxI instruction.
---
 llvm/lib/Analysis/ValueTracking.cpp           |  49 +++-
 .../Transforms/LoopVectorize/issue180180.ll   | 219 ++++++++++++++++++
 2 files changed, 261 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index b3d159e4ff43e..ba08abe4c6748 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -718,13 +718,49 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
 
   // Helper to make sure the current function cannot arrange for
   // another thread to free on its behalf.
-  auto hasNoSyncCalls = [](auto Range) {
+  auto hasNoSync = [](auto Range) {
     for (const auto &[Idx, I] : enumerate(Range)) {
       if (Idx > MaxInstrsToCheckForFree)
         return false;
-      if (const auto *CB = dyn_cast<CallBase>(&I))
-        if (!CB->hasFnAttr(Attribute::NoSync))
+      if (I.isVolatile()) {
+        return false;
+      }
+
+      auto isOrderedAtomic = [](const Instruction *Inst) {
+        if (!Inst->isAtomic())
           return false;
+
+        if (auto *FI = dyn_cast<FenceInst>(Inst))
+          // All legal orderings for fence are stronger than monotonic.
+          return FI->getSyncScopeID() != SyncScope::SingleThread;
+        else if (isa<AtomicCmpXchgInst>(Inst) || isa<AtomicRMWInst>(Inst))
+          return true;
+        else if (auto *SI = dyn_cast<StoreInst>(Inst))
+          return !SI->isUnordered();
+        else if (auto *LI = dyn_cast<LoadInst>(Inst))
+          return !LI->isUnordered();
+        else {
+          llvm_unreachable("unknown atomic instruction?");
+        }
+      };
+      // An ordered atomic may synchronize.
+      if (isOrderedAtomic(&I)) {
+        return false;
+      }
+
+      auto *CB = dyn_cast<CallBase>(&I);
+      if (!CB)
+        // Non call site cases covered by the two checks above
+        continue;
+
+      if (CB->hasFnAttr(Attribute::NoSync))
+        continue;
+
+      // Non volatile memset/memcpy/memmoves are nosync
+      if (auto *MI = dyn_cast<MemIntrinsic>(&I))
+        if (!MI->isVolatile())
+          continue;
+      return false;
     }
     return true;
   };
@@ -738,8 +774,8 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
       return false;
 
     if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)) ||
-        !hasNoSyncCalls(make_range(CtxBB->begin(), CtxIter)))
-      return false;
+        !hasNoSync(make_range(CtxBB->begin(), CtxIter)))
+          return false;
 
     CtxIter = AssumeBB->end();
   } else {
@@ -747,11 +783,10 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
     if (!Assume->comesBefore(CtxI))
       return false;
   }
-
   // Check if there are any calls between Assume and CtxIter that may free
   // memory.
   return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter)) &&
-         hasNoSyncCalls(make_range(Assume->getIterator(), CtxIter));
+         hasNoSync(make_range(Assume->getIterator(), CtxIter));
 }
 
 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
diff --git a/llvm/test/Transforms/LoopVectorize/issue180180.ll b/llvm/test/Transforms/LoopVectorize/issue180180.ll
index b17acb67488cd..14352440f44ca 100644
--- a/llvm/test/Transforms/LoopVectorize/issue180180.ll
+++ b/llvm/test/Transforms/LoopVectorize/issue180180.ll
@@ -119,8 +119,227 @@ br1: ; preds = %br2, %entry, %for.body.for.end.loopexit_crit_edge.i
   %coerce.val.pi.pre-phi.i = phi i64 [ %coerce.val.pi.i.i14, %entry ], [ %.pre14.i, %for.body.for.end.loopexit_crit_edge.i ], [ %coerce.val.pi.i.i14, %br2 ]
   ret i64 %coerce.val.pi.pre-phi.i
 }
+; Function Attrs: mustprogress nocallback nounwind willreturn memory(inaccessiblemem: write)
+
+; Function Attrs: mustprogress norecurse nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync)
+define i64 @volatileVectorizingTest(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) %v, i32 noundef %n) local_unnamed_addr #0 {
+; CHECK-LABEL: define i64 @volatileVectorizingTest(
+; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[V:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V]], align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT:    [[__END__I:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__END__I]], align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT:    [[VOLATILE:%.*]] = load volatile ptr, ptr [[V]], align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT:    [[CMP_I_I7_NOT11_I:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I_I7_NOT11_I]], label %[[BR1:.*]], label %[[FOR_BODY_I:.*]]
+; CHECK:       [[FOR_BODY_I]]:
+; CHECK-NEXT:    [[__FIRST_SROA_0_012_I:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[FOR_INC_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[__FIRST_SROA_0_012_I]], align 4
+; CHECK-NEXT:    [[CMP_I_I:%.*]] = icmp slt i32 [[TMP2]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP_I_I]], label %[[BR1]], label %[[FOR_INC_I]]
+; CHECK:       [[FOR_INC_I]]:
+; CHECK-NEXT:    [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__FIRST_SROA_0_012_I]], i64 4
+; CHECK-NEXT:    [[CMP_I_I7_NOT_I:%.*]] = icmp eq ptr [[INCDEC_PTR_I_I]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I_I7_NOT_I]], label %[[BR1]], label %[[FOR_BODY_I]]
+; CHECK:       [[BR1]]:
+; CHECK-NEXT:    [[COERCE_VAL_PI_PRE_PHI_I_IN:%.*]] = phi ptr [ [[TMP1]], %[[ENTRY]] ], [ [[TMP1]], %[[FOR_INC_I]] ], [ [[__FIRST_SROA_0_012_I]], %[[FOR_BODY_I]] ]
+; CHECK-NEXT:    [[COERCE_VAL_PI_PRE_PHI_I:%.*]] = ptrtoint ptr [[COERCE_VAL_PI_PRE_PHI_I_IN]] to i64
+; CHECK-NEXT:    ret i64 [[COERCE_VAL_PI_PRE_PHI_I]]
+;
+entry:
+  %0 = load ptr, ptr %v, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+  %coerce.val.pi.i.i = ptrtoint ptr %0 to i64
+  %__end_.i = getelementptr inbounds nuw i8, ptr %v, i64 8
+  %1 = load ptr, ptr %__end_.i, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+  %coerce.val.pi.i.i14 = ptrtoint ptr %1 to i64
+  %sub.ptr.sub.i.i.i = sub i64 %coerce.val.pi.i.i14, %coerce.val.pi.i.i
+  call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %0, i64 %sub.ptr.sub.i.i.i) ]
+  %volatile = load volatile ptr, ptr %v, align 8                     ; Volatile Instruction
+  call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+  %cmp.i.i7.not11.i = icmp eq ptr %0, %1
+  br i1 %cmp.i.i7.not11.i, label %br1, label %for.body.i.preheader
+
+for.body.i.preheader:                             ; preds = %entry
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i.preheader, %for.inc.i
+  %__first.sroa.0.012.i = phi ptr [ %incdec.ptr.i.i, %for.inc.i ], [ %0, %for.body.i.preheader ]
+  %2 = load i32, ptr %__first.sroa.0.012.i, align 4
+  %cmp.i.i = icmp slt i32 %2, %n
+  br i1 %cmp.i.i, label %for.body.for.end.loopexit_crit_edge.i, label %for.inc.i
+
+for.body.for.end.loopexit_crit_edge.i:            ; preds = %for.body.i
+  %__first.sroa.0.012.i.lcssa = phi ptr [ %__first.sroa.0.012.i, %for.body.i ]
+  %.pre14.i = ptrtoint ptr %__first.sroa.0.012.i.lcssa to i64
+  br label %br1
+
+for.inc.i:                                        ; preds = %for.body.i
+  %incdec.ptr.i.i = getelementptr inbounds nuw i8, ptr %__first.sroa.0.012.i, i64 4
+  %cmp.i.i7.not.i = icmp eq ptr %incdec.ptr.i.i, %1
+  br i1 %cmp.i.i7.not.i, label %br2, label %for.body.i
+
+br2: ; preds = %for.inc.i
+  br label %br1
+
+br1: ; preds = %br2, %entry, %for.body.for.end.loopexit_crit_edge.i
+  %coerce.val.pi.pre-phi.i = phi i64 [ %coerce.val.pi.i.i14, %entry ], [ %.pre14.i, %for.body.for.end.loopexit_crit_edge.i ], [ %coerce.val.pi.i.i14, %br2 ]
+  ret i64 %coerce.val.pi.pre-phi.i
+}
+; Function Attrs: mustprogress nocallback nounwind willreturn memory(inaccessiblemem: write)
+
+; Function Attrs: mustprogress norecurse nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync)
+define i64 @fenceVectorizingTest(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) %v, i32 noundef %n) local_unnamed_addr #0 {
+; CHECK-LABEL: define i64 @fenceVectorizingTest(
+; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[V:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V]], align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT:    [[__END__I:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__END__I]], align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT:    fence seq_cst
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT:    [[CMP_I_I7_NOT11_I:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I_I7_NOT11_I]], label %[[BR1:.*]], label %[[FOR_BODY_I:.*]]
+; CHECK:       [[FOR_BODY_I]]:
+; CHECK-NEXT:    [[__FIRST_SROA_0_012_I:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[FOR_INC_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[__FIRST_SROA_0_012_I]], align 4
+; CHECK-NEXT:    [[CMP_I_I:%.*]] = icmp slt i32 [[TMP2]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP_I_I]], label %[[BR1]], label %[[FOR_INC_I]]
+; CHECK:       [[FOR_INC_I]]:
+; CHECK-NEXT:    [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__FIRST_SROA_0_012_I]], i64 4
+; CHECK-NEXT:    [[CMP_I_I7_NOT_I:%.*]] = icmp eq ptr [[INCDEC_PTR_I_I]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I_I7_NOT_I]], label %[[BR1]], label %[[FOR_BODY_I]]
+; CHECK:       [[BR1]]:
+; CHECK-NEXT:    [[COERCE_VAL_PI_PRE_PHI_I_IN:%.*]] = phi ptr [ [[TMP1]], %[[ENTRY]] ], [ [[TMP1]], %[[FOR_INC_I]] ], [ [[__FIRST_SROA_0_012_I]], %[[FOR_BODY_I]] ]
+; CHECK-NEXT:    [[COERCE_VAL_PI_PRE_PHI_I:%.*]] = ptrtoint ptr [[COERCE_VAL_PI_PRE_PHI_I_IN]] to i64
+; CHECK-NEXT:    ret i64 [[COERCE_VAL_PI_PRE_PHI_I]]
+;
+entry:
+  %0 = load ptr, ptr %v, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+  %coerce.val.pi.i.i = ptrtoint ptr %0 to i64
+  %__end_.i = getelementptr inbounds nuw i8, ptr %v, i64 8
+  %1 = load ptr, ptr %__end_.i, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+  %coerce.val.pi.i.i14 = ptrtoint ptr %1 to i64
+  %sub.ptr.sub.i.i.i = sub i64 %coerce.val.pi.i.i14, %coerce.val.pi.i.i
+  call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %0, i64 %sub.ptr.sub.i.i.i) ]
+  fence seq_cst                                                 ; Fence Instruction
+  call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+  %cmp.i.i7.not11.i = icmp eq ptr %0, %1
+  br i1 %cmp.i.i7.not11.i, label %br1, label %for.body.i.preheader
+
+for.body.i.preheader:                             ; preds = %entry
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i.preheader, %for.inc.i
+  %__first.sroa.0.012.i = phi ptr [ %incdec.ptr.i.i, %for.inc.i ], [ %0, %for.body.i.preheader ]
+  %2 = load i32, ptr %__first.sroa.0.012.i, align 4
+  %cmp.i.i = icmp slt i32 %2, %n
+  br i1 %cmp.i.i, label %for.body.for.end.loopexit_crit_edge.i, label %for.inc.i
+
+for.body.for.end.loopexit_crit_edge.i:            ; preds = %for.body.i
+  %__first.sroa.0.012.i.lcssa = phi ptr [ %__first.sroa.0.012.i, %for.body.i ]
+  %.pre14.i = ptrtoint ptr %__first.sroa.0.012.i.lcssa to i64
+  br label %br1
+
+for.inc.i:                                        ; preds = %for.body.i
+  %incdec.ptr.i.i = getelementptr inbounds nuw i8, ptr %__first.sroa.0.012.i, i64 4
+  %cmp.i.i7.not.i = icmp eq ptr %incdec.ptr.i.i, %1
+  br i1 %cmp.i.i7.not.i, label %br2, label %for.body.i
+
+br2: ; preds = %for.inc.i
+  br label %br1
+
+br1: ; preds = %br2, %entry, %for.body.for.end.loopexit_crit_edge.i
+  %coerce.val.pi.pre-phi.i = phi i64 [ %coerce.val.pi.i.i14, %entry ], [ %.pre14.i, %for.body.for.end.loopexit_crit_edge.i ], [ %coerce.val.pi.i.i14, %br2 ]
+  ret i64 %coerce.val.pi.pre-phi.i
+}
+; Function Attrs: mustprogress nocallback nounwind willreturn memory(inaccessiblemem: write)
+
+; Function Attrs: mustprogress norecurse nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync)
+define i64 @atomicVectorizingTest(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) %v, i32 noundef %n) local_unnamed_addr #0 {
+; CHECK-LABEL: define i64 @atomicVectorizingTest(
+; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[V:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR1]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V]], align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT:    [[__END__I:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__END__I]], align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT:    [[ATOMIC:%.*]] = load atomic ptr, ptr [[V]] seq_cst, align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT:    [[CMP_I_I7_NOT11_I:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I_I7_NOT11_I]], label %[[BR1:.*]], label %[[FOR_BODY_I:.*]]
+; CHECK:       [[FOR_BODY_I]]:
+; CHECK-NEXT:    [[__FIRST_SROA_0_012_I:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[FOR_INC_I:.*]] ], [ [[TMP0]], %[[ENTRY]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[__FIRST_SROA_0_012_I]], align 4
+; CHECK-NEXT:    [[CMP_I_I:%.*]] = icmp slt i32 [[TMP2]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP_I_I]], label %[[BR1]], label %[[FOR_INC_I]]
+; CHECK:       [[FOR_INC_I]]:
+; CHECK-NEXT:    [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__FIRST_SROA_0_012_I]], i64 4
+; CHECK-NEXT:    [[CMP_I_I7_NOT_I:%.*]] = icmp eq ptr [[INCDEC_PTR_I_I]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I_I7_NOT_I]], label %[[BR1]], label %[[FOR_BODY_I]]
+; CHECK:       [[BR1]]:
+; CHECK-NEXT:    [[COERCE_VAL_PI_PRE_PHI_I_IN:%.*]] = phi ptr [ [[TMP1]], %[[ENTRY]] ], [ [[TMP1]], %[[FOR_INC_I]] ], [ [[__FIRST_SROA_0_012_I]], %[[FOR_BODY_I]] ]
+; CHECK-NEXT:    [[COERCE_VAL_PI_PRE_PHI_I:%.*]] = ptrtoint ptr [[COERCE_VAL_PI_PRE_PHI_I_IN]] to i64
+; CHECK-NEXT:    ret i64 [[COERCE_VAL_PI_PRE_PHI_I]]
+;
+entry:
+  %0 = load ptr, ptr %v, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+  %coerce.val.pi.i.i = ptrtoint ptr %0 to i64
+  %__end_.i = getelementptr inbounds nuw i8, ptr %v, i64 8
+  %1 = load ptr, ptr %__end_.i, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+  %coerce.val.pi.i.i14 = ptrtoint ptr %1 to i64
+  %sub.ptr.sub.i.i.i = sub i64 %coerce.val.pi.i.i14, %coerce.val.pi.i.i
+  call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %0, i64 %sub.ptr.sub.i.i.i) ]
+  %atomic = load atomic ptr, ptr %v seq_cst, align 8                ; Atomic Instruction
+  call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+  %cmp.i.i7.not11.i = icmp eq ptr %0, %1
+  br i1 %cmp.i.i7.not11.i, label %br1, label %for.body.i.preheader
 
+for.body.i.preheader:                             ; preds = %entry
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i.preheader, %for.inc.i
+  %__first.sroa.0.012.i = phi ptr [ %incdec.ptr.i.i, %for.inc.i ], [ %0, %for.body.i.preheader ]
+  %2 = load i32, ptr %__first.sroa.0.012.i, align 4
+  %cmp.i.i = icmp slt i32 %2, %n
+  br i1 %cmp.i.i, label %for.body.for.end.loopexit_crit_edge.i, label %for.inc.i
+
+for.body.for.end.loopexit_crit_edge.i:            ; preds = %for.body.i
+  %__first.sroa.0.012.i.lcssa = phi ptr [ %__first.sroa.0.012.i, %for.body.i ]
+  %.pre14.i = ptrtoint ptr %__first.sroa.0.012.i.lcssa to i64
+  br label %br1
+
+for.inc.i:                                        ; preds = %for.body.i
+  %incdec.ptr.i.i = getelementptr inbounds nuw i8, ptr %__first.sroa.0.012.i, i64 4
+  %cmp.i.i7.not.i = icmp eq ptr %incdec.ptr.i.i, %1
+  br i1 %cmp.i.i7.not.i, label %br2, label %for.body.i
+
+br2: ; preds = %for.inc.i
+  br label %br1
+
+br1: ; preds = %br2, %entry, %for.body.for.end.loopexit_crit_edge.i
+  %coerce.val.pi.pre-phi.i = phi i64 [ %coerce.val.pi.i.i14, %entry ], [ %.pre14.i, %for.body.for.end.loopexit_crit_edge.i ], [ %coerce.val.pi.i.i14, %br2 ]
+  ret i64 %coerce.val.pi.pre-phi.i
+}
 ; Function Attrs: mustprogress nocallback nounwind willreturn memory(inaccessiblemem: write)
+
 declare void @llvm.assume(i1 noundef) #1
 
 attributes #0 = { mustprogress nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync) "frame-pointer"="non-leaf-no-reserve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }



More information about the llvm-commits mailing list