[llvm] [ValueTracking] Conservative nosync check prevents vectorization (PR #181345)

via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 13 02:51:08 PST 2026


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms

@llvm/pr-subscribers-llvm-analysis

Author: Kshitij Paranjape (kshitijvp)

<details>
<summary>Changes</summary>

Conservative nosync check in ValueTracking.cpp returns false
causing potentially faulting load preventing vectorization.
Instead check if any instructions between Assume Instruction
and Ctx Instruction are synchronizing.

Fixes #<!-- -->180180 

---
Full diff: https://github.com/llvm/llvm-project/pull/181345.diff


2 Files Affected:

- (modified) llvm/lib/Analysis/ValueTracking.cpp (+13-5) 
- (added) llvm/test/Transforms/LoopVectorize/issue180180.ll (+133) 


``````````diff
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 8761b7bcb51a2..37f7005fe4c3d 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -715,11 +715,19 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
     }
     return true;
   };
-
+  
   // Make sure the current function cannot arrange for another thread to free on
   // its behalf.
-  if (!CtxI->getFunction()->hasNoSync())
-    return false;
+  auto hasNoSyncCalls = [](auto Range) {
+    for (const auto &[Idx, I] : enumerate(Range)) {
+      if (Idx > MaxInstrsToCheckForFree)
+        return false;
+      if (const auto *CB = dyn_cast<CallBase>(&I))
+        if (!CB->hasFnAttr(Attribute::NoSync))
+          return false;
+    }
+    return true;
+  };
 
   // Handle cross-block case: CtxI in a successor of Assume's block.
   const BasicBlock *CtxBB = CtxI->getParent();
@@ -729,7 +737,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
     if (CtxBB->getSinglePredecessor() != AssumeBB)
       return false;
 
-    if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)))
+    if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)) || !hasNoSyncCalls(make_range(CtxBB->begin(), CtxIter)))
       return false;
 
     CtxIter = AssumeBB->end();
@@ -741,7 +749,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
 
   // Check if there are any calls between Assume and CtxIter that may free
   // memory.
-  return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter));
+  return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter)) && hasNoSyncCalls(make_range(Assume->getIterator(), CtxIter));
 }
 
 // TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
diff --git a/llvm/test/Transforms/LoopVectorize/issue180180.ll b/llvm/test/Transforms/LoopVectorize/issue180180.ll
new file mode 100644
index 0000000000000..b17acb67488cd
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/issue180180.ll
@@ -0,0 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt %s -O3 -debug-only=loop-vectorize -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at ptr = local_unnamed_addr global ptr null, align 8
+
+; Function Attrs: mustprogress norecurse nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync)
+define i64 @notVectorizingTest(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) %v, i32 noundef %n) local_unnamed_addr #0 {
+; CHECK-LABEL: define i64 @notVectorizingTest(
+; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[V:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[V]], align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT:    [[__END__I:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[__END__I]], align 8
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT:    [[COERCE_VAL_PI_I_I14:%.*]] = ptrtoint ptr [[TMP1]] to i64
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT:    [[CMP_I_I7_NOT11_I:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I_I7_NOT11_I]], label %[[BR1:.*]], label %[[FOR_BODY_I_PREHEADER:.*]]
+; CHECK:       [[FOR_BODY_I_PREHEADER]]:
+; CHECK-NEXT:    [[COERCE_VAL_PI_I_I:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[COERCE_VAL_PI_I_I14]], -4
+; CHECK-NEXT:    [[TMP3:%.*]] = sub i64 [[TMP2]], [[COERCE_VAL_PI_I_I]]
+; CHECK-NEXT:    [[TMP4:%.*]] = lshr exact i64 [[TMP3]], 2
+; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 12
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_I_PREHEADER6:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP5]], 9223372036854775804
+; CHECK-NEXT:    [[TMP6:%.*]] = shl i64 [[N_VEC]], 2
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP6]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
+; CHECK-NEXT:    [[WIDE_LOAD_FR:%.*]] = freeze <4 x i32> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD_FR]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast <4 x i1> [[TMP8]] to i4
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i4 [[TMP9]], 0
+; CHECK-NEXT:    br i1 [[DOTNOT]], label %[[VECTOR_BODY_INTERIM]], label %[[VECTOR_EARLY_EXIT:.*]]
+; CHECK:       [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP5]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label %[[BR1]], label %[[FOR_BODY_I_PREHEADER6]]
+; CHECK:       [[FOR_BODY_I_PREHEADER6]]:
+; CHECK-NEXT:    [[__FIRST_SROA_0_012_I_PH:%.*]] = phi ptr [ [[TMP0]], %[[FOR_BODY_I_PREHEADER]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label %[[FOR_BODY_I:.*]]
+; CHECK:       [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT:    [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 false)
+; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
+; CHECK-NEXT:    [[TMP13:%.*]] = shl i64 [[TMP12]], 2
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP13]]
+; CHECK-NEXT:    br label %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I:.*]]
+; CHECK:       [[FOR_BODY_I]]:
+; CHECK-NEXT:    [[__FIRST_SROA_0_012_I:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[FOR_INC_I:.*]] ], [ [[__FIRST_SROA_0_012_I_PH]], %[[FOR_BODY_I_PREHEADER6]] ]
+; CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[__FIRST_SROA_0_012_I]], align 4
+; CHECK-NEXT:    [[CMP_I_I:%.*]] = icmp slt i32 [[TMP15]], [[N]]
+; CHECK-NEXT:    br i1 [[CMP_I_I]], label %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]], label %[[FOR_INC_I]]
+; CHECK:       [[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]]:
+; CHECK-NEXT:    [[__FIRST_SROA_0_012_I_LCSSA:%.*]] = phi ptr [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ], [ [[__FIRST_SROA_0_012_I]], %[[FOR_BODY_I]] ]
+; CHECK-NEXT:    [[DOTPRE14_I:%.*]] = ptrtoint ptr [[__FIRST_SROA_0_012_I_LCSSA]] to i64
+; CHECK-NEXT:    br label %[[BR1]]
+; CHECK:       [[FOR_INC_I]]:
+; CHECK-NEXT:    [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__FIRST_SROA_0_012_I]], i64 4
+; CHECK-NEXT:    [[CMP_I_I7_NOT_I:%.*]] = icmp eq ptr [[INCDEC_PTR_I_I]], [[TMP1]]
+; CHECK-NEXT:    br i1 [[CMP_I_I7_NOT_I]], label %[[BR1]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       [[BR1]]:
+; CHECK-NEXT:    [[COERCE_VAL_PI_PRE_PHI_I:%.*]] = phi i64 [ [[COERCE_VAL_PI_I_I14]], %[[ENTRY]] ], [ [[DOTPRE14_I]], %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]] ], [ [[COERCE_VAL_PI_I_I14]], %[[MIDDLE_BLOCK]] ], [ [[COERCE_VAL_PI_I_I14]], %[[FOR_INC_I]] ]
+; CHECK-NEXT:    ret i64 [[COERCE_VAL_PI_PRE_PHI_I]]
+;
+entry:
+  %0 = load ptr, ptr %v, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+  %coerce.val.pi.i.i = ptrtoint ptr %0 to i64
+  %__end_.i = getelementptr inbounds nuw i8, ptr %v, i64 8
+  %1 = load ptr, ptr %__end_.i, align 8
+  call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+  %coerce.val.pi.i.i14 = ptrtoint ptr %1 to i64
+  %sub.ptr.sub.i.i.i = sub i64 %coerce.val.pi.i.i14, %coerce.val.pi.i.i
+  call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %0, i64 %sub.ptr.sub.i.i.i) ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+  call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+  %cmp.i.i7.not11.i = icmp eq ptr %0, %1
+  br i1 %cmp.i.i7.not11.i, label %br1, label %for.body.i.preheader
+
+for.body.i.preheader:                             ; preds = %entry
+  br label %for.body.i
+
+for.body.i:                                       ; preds = %for.body.i.preheader, %for.inc.i
+  %__first.sroa.0.012.i = phi ptr [ %incdec.ptr.i.i, %for.inc.i ], [ %0, %for.body.i.preheader ]
+  %2 = load i32, ptr %__first.sroa.0.012.i, align 4
+  %cmp.i.i = icmp slt i32 %2, %n
+  br i1 %cmp.i.i, label %for.body.for.end.loopexit_crit_edge.i, label %for.inc.i
+
+for.body.for.end.loopexit_crit_edge.i:            ; preds = %for.body.i
+  %__first.sroa.0.012.i.lcssa = phi ptr [ %__first.sroa.0.012.i, %for.body.i ]
+  %.pre14.i = ptrtoint ptr %__first.sroa.0.012.i.lcssa to i64
+  br label %br1
+
+for.inc.i:                                        ; preds = %for.body.i
+  %incdec.ptr.i.i = getelementptr inbounds nuw i8, ptr %__first.sroa.0.012.i, i64 4
+  %cmp.i.i7.not.i = icmp eq ptr %incdec.ptr.i.i, %1
+  br i1 %cmp.i.i7.not.i, label %br2, label %for.body.i
+
+br2: ; preds = %for.inc.i
+  br label %br1
+
+br1: ; preds = %br2, %entry, %for.body.for.end.loopexit_crit_edge.i
+  %coerce.val.pi.pre-phi.i = phi i64 [ %coerce.val.pi.i.i14, %entry ], [ %.pre14.i, %for.body.for.end.loopexit_crit_edge.i ], [ %coerce.val.pi.i.i14, %br2 ]
+  ret i64 %coerce.val.pi.pre-phi.i
+}
+
+; Function Attrs: mustprogress nocallback nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef) #1
+
+attributes #0 = { mustprogress nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync) "frame-pointer"="non-leaf-no-reserve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.

``````````

</details>


https://github.com/llvm/llvm-project/pull/181345


More information about the llvm-commits mailing list