[llvm] [ValueTracking] Conservative nosync check prevents vectorization (PR #181345)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 13 02:51:08 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
@llvm/pr-subscribers-llvm-analysis
Author: Kshitij Paranjape (kshitijvp)
<details>
<summary>Changes</summary>
Conservative nosync check in ValueTracking.cpp returns false
causing potentially faulting load preventing vectorization.
Instead check if any instructions between Assume Instruction
and Ctx Instruction are synchronizing.
Fixes #<!-- -->180180
---
Full diff: https://github.com/llvm/llvm-project/pull/181345.diff
2 Files Affected:
- (modified) llvm/lib/Analysis/ValueTracking.cpp (+13-5)
- (added) llvm/test/Transforms/LoopVectorize/issue180180.ll (+133)
``````````diff
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 8761b7bcb51a2..37f7005fe4c3d 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -715,11 +715,19 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
}
return true;
};
-
+
// Make sure the current function cannot arrange for another thread to free on
// its behalf.
- if (!CtxI->getFunction()->hasNoSync())
- return false;
+ auto hasNoSyncCalls = [](auto Range) {
+ for (const auto &[Idx, I] : enumerate(Range)) {
+ if (Idx > MaxInstrsToCheckForFree)
+ return false;
+ if (const auto *CB = dyn_cast<CallBase>(&I))
+ if (!CB->hasFnAttr(Attribute::NoSync))
+ return false;
+ }
+ return true;
+ };
// Handle cross-block case: CtxI in a successor of Assume's block.
const BasicBlock *CtxBB = CtxI->getParent();
@@ -729,7 +737,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
if (CtxBB->getSinglePredecessor() != AssumeBB)
return false;
- if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)))
+ if (!hasNoFreeCalls(make_range(CtxBB->begin(), CtxIter)) || !hasNoSyncCalls(make_range(CtxBB->begin(), CtxIter)))
return false;
CtxIter = AssumeBB->end();
@@ -741,7 +749,7 @@ bool llvm::willNotFreeBetween(const Instruction *Assume,
// Check if there are any calls between Assume and CtxIter that may free
// memory.
- return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter));
+ return hasNoFreeCalls(make_range(Assume->getIterator(), CtxIter)) && hasNoSyncCalls(make_range(Assume->getIterator(), CtxIter));
}
// TODO: cmpExcludesZero misses many cases where `RHS` is non-constant but
diff --git a/llvm/test/Transforms/LoopVectorize/issue180180.ll b/llvm/test/Transforms/LoopVectorize/issue180180.ll
new file mode 100644
index 0000000000000..b17acb67488cd
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/issue180180.ll
@@ -0,0 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt %s -O3 -debug-only=loop-vectorize -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at ptr = local_unnamed_addr global ptr null, align 8
+
+; Function Attrs: mustprogress norecurse nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync)
+define i64 @notVectorizingTest(ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) %v, i32 noundef %n) local_unnamed_addr #0 {
+; CHECK-LABEL: define i64 @notVectorizingTest(
+; CHECK-SAME: ptr noundef nonnull readonly align 8 captures(none) dereferenceable(24) [[V:%.*]], i32 noundef [[N:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[V]], align 8
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT: [[__END__I:%.*]] = getelementptr inbounds nuw i8, ptr [[V]], i64 8
+; CHECK-NEXT: [[TMP1:%.*]] = load ptr, ptr [[__END__I]], align 8
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT: [[COERCE_VAL_PI_I_I14:%.*]] = ptrtoint ptr [[TMP1]] to i64
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP0]], i64 4) ]
+; CHECK-NEXT: call void @llvm.assume(i1 true) [ "align"(ptr [[TMP1]], i64 4) ]
+; CHECK-NEXT: [[CMP_I_I7_NOT11_I:%.*]] = icmp eq ptr [[TMP0]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_I_I7_NOT11_I]], label %[[BR1:.*]], label %[[FOR_BODY_I_PREHEADER:.*]]
+; CHECK: [[FOR_BODY_I_PREHEADER]]:
+; CHECK-NEXT: [[COERCE_VAL_PI_I_I:%.*]] = ptrtoint ptr [[TMP0]] to i64
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[COERCE_VAL_PI_I_I14]], -4
+; CHECK-NEXT: [[TMP3:%.*]] = sub i64 [[TMP2]], [[COERCE_VAL_PI_I_I]]
+; CHECK-NEXT: [[TMP4:%.*]] = lshr exact i64 [[TMP3]], 2
+; CHECK-NEXT: [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 12
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_I_PREHEADER6:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP5]], 9223372036854775804
+; CHECK-NEXT: [[TMP6:%.*]] = shl i64 [[N_VEC]], 2
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP6]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[N]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY_INTERIM:.*]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 2
+; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[NEXT_GEP]], align 4
+; CHECK-NEXT: [[WIDE_LOAD_FR:%.*]] = freeze <4 x i32> [[WIDE_LOAD]]
+; CHECK-NEXT: [[TMP8:%.*]] = icmp slt <4 x i32> [[WIDE_LOAD_FR]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i1> [[TMP8]] to i4
+; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i4 [[TMP9]], 0
+; CHECK-NEXT: br i1 [[DOTNOT]], label %[[VECTOR_BODY_INTERIM]], label %[[VECTOR_EARLY_EXIT:.*]]
+; CHECK: [[VECTOR_BODY_INTERIM]]:
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP5]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[BR1]], label %[[FOR_BODY_I_PREHEADER6]]
+; CHECK: [[FOR_BODY_I_PREHEADER6]]:
+; CHECK-NEXT: [[__FIRST_SROA_0_012_I_PH:%.*]] = phi ptr [ [[TMP0]], %[[FOR_BODY_I_PREHEADER]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: br label %[[FOR_BODY_I:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT: [[TMP11:%.*]] = tail call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP8]], i1 false)
+; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = shl i64 [[TMP12]], 2
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[TMP0]], i64 [[TMP13]]
+; CHECK-NEXT: br label %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I:.*]]
+; CHECK: [[FOR_BODY_I]]:
+; CHECK-NEXT: [[__FIRST_SROA_0_012_I:%.*]] = phi ptr [ [[INCDEC_PTR_I_I:%.*]], %[[FOR_INC_I:.*]] ], [ [[__FIRST_SROA_0_012_I_PH]], %[[FOR_BODY_I_PREHEADER6]] ]
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[__FIRST_SROA_0_012_I]], align 4
+; CHECK-NEXT: [[CMP_I_I:%.*]] = icmp slt i32 [[TMP15]], [[N]]
+; CHECK-NEXT: br i1 [[CMP_I_I]], label %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]], label %[[FOR_INC_I]]
+; CHECK: [[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]]:
+; CHECK-NEXT: [[__FIRST_SROA_0_012_I_LCSSA:%.*]] = phi ptr [ [[TMP14]], %[[VECTOR_EARLY_EXIT]] ], [ [[__FIRST_SROA_0_012_I]], %[[FOR_BODY_I]] ]
+; CHECK-NEXT: [[DOTPRE14_I:%.*]] = ptrtoint ptr [[__FIRST_SROA_0_012_I_LCSSA]] to i64
+; CHECK-NEXT: br label %[[BR1]]
+; CHECK: [[FOR_INC_I]]:
+; CHECK-NEXT: [[INCDEC_PTR_I_I]] = getelementptr inbounds nuw i8, ptr [[__FIRST_SROA_0_012_I]], i64 4
+; CHECK-NEXT: [[CMP_I_I7_NOT_I:%.*]] = icmp eq ptr [[INCDEC_PTR_I_I]], [[TMP1]]
+; CHECK-NEXT: br i1 [[CMP_I_I7_NOT_I]], label %[[BR1]], label %[[FOR_BODY_I]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[BR1]]:
+; CHECK-NEXT: [[COERCE_VAL_PI_PRE_PHI_I:%.*]] = phi i64 [ [[COERCE_VAL_PI_I_I14]], %[[ENTRY]] ], [ [[DOTPRE14_I]], %[[FOR_BODY_FOR_END_LOOPEXIT_CRIT_EDGE_I]] ], [ [[COERCE_VAL_PI_I_I14]], %[[MIDDLE_BLOCK]] ], [ [[COERCE_VAL_PI_I_I14]], %[[FOR_INC_I]] ]
+; CHECK-NEXT: ret i64 [[COERCE_VAL_PI_PRE_PHI_I]]
+;
+entry:
+ %0 = load ptr, ptr %v, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+ %coerce.val.pi.i.i = ptrtoint ptr %0 to i64
+ %__end_.i = getelementptr inbounds nuw i8, ptr %v, i64 8
+ %1 = load ptr, ptr %__end_.i, align 8
+ call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+ %coerce.val.pi.i.i14 = ptrtoint ptr %1 to i64
+ %sub.ptr.sub.i.i.i = sub i64 %coerce.val.pi.i.i14, %coerce.val.pi.i.i
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %0, i64 %sub.ptr.sub.i.i.i) ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %0, i64 4) ]
+ call void @llvm.assume(i1 true) [ "align"(ptr %1, i64 4) ]
+ %cmp.i.i7.not11.i = icmp eq ptr %0, %1
+ br i1 %cmp.i.i7.not11.i, label %br1, label %for.body.i.preheader
+
+for.body.i.preheader: ; preds = %entry
+ br label %for.body.i
+
+for.body.i: ; preds = %for.body.i.preheader, %for.inc.i
+ %__first.sroa.0.012.i = phi ptr [ %incdec.ptr.i.i, %for.inc.i ], [ %0, %for.body.i.preheader ]
+ %2 = load i32, ptr %__first.sroa.0.012.i, align 4
+ %cmp.i.i = icmp slt i32 %2, %n
+ br i1 %cmp.i.i, label %for.body.for.end.loopexit_crit_edge.i, label %for.inc.i
+
+for.body.for.end.loopexit_crit_edge.i: ; preds = %for.body.i
+ %__first.sroa.0.012.i.lcssa = phi ptr [ %__first.sroa.0.012.i, %for.body.i ]
+ %.pre14.i = ptrtoint ptr %__first.sroa.0.012.i.lcssa to i64
+ br label %br1
+
+for.inc.i: ; preds = %for.body.i
+ %incdec.ptr.i.i = getelementptr inbounds nuw i8, ptr %__first.sroa.0.012.i, i64 4
+ %cmp.i.i7.not.i = icmp eq ptr %incdec.ptr.i.i, %1
+ br i1 %cmp.i.i7.not.i, label %br2, label %for.body.i
+
+br2: ; preds = %for.inc.i
+ br label %br1
+
+br1: ; preds = %br2, %entry, %for.body.for.end.loopexit_crit_edge.i
+ %coerce.val.pi.pre-phi.i = phi i64 [ %coerce.val.pi.i.i14, %entry ], [ %.pre14.i, %for.body.for.end.loopexit_crit_edge.i ], [ %coerce.val.pi.i.i14, %br2 ]
+ ret i64 %coerce.val.pi.pre-phi.i
+}
+
+; Function Attrs: mustprogress nocallback nounwind willreturn memory(inaccessiblemem: write)
+declare void @llvm.assume(i1 noundef) #1
+
+attributes #0 = { mustprogress nounwind ssp memory(read, inaccessiblemem: write, target_mem0: none, target_mem1: none) uwtable(sync) "frame-pointer"="non-leaf-no-reserve" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+;.
``````````
</details>
https://github.com/llvm/llvm-project/pull/181345
More information about the llvm-commits
mailing list