[llvm] [LoopInfo] Don't recognize loop as parallel if it stores to out-of-loop alloca (PR #180551)
Julius Ikkala via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 10 08:10:02 PST 2026
https://github.com/juliusikkala updated https://github.com/llvm/llvm-project/pull/180551
>From b626e7e07409d093f9f69aeaa204c529fde11bcc Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Mon, 9 Feb 2026 17:30:54 +0200
Subject: [PATCH 1/4] [LoopInfo] Don't recognize loop as parallel if it stores
to out-of-loop alloca
---
llvm/lib/Analysis/LoopInfo.cpp | 15 +++++
.../LoopInfo/annotated-parallel-alloca.ll | 57 +++++++++++++++++++
2 files changed, 72 insertions(+)
create mode 100644 llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index a364b21c64b01..d5203a20c8c6c 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -591,6 +591,21 @@ bool Loop::isAnnotatedParallel() const {
if (!I.mayReadOrWriteMemory())
continue;
+ // If the loop contains a store instruction into an alloca that is outside
+ // of the loop, it is possible that the alloca was initially related to a
+ // loop-local variable but got hoisted outside during e.g. inlining or
+ // some other parallel-loop-unaware pass.
+ //
+ // TODO: Allow metadata to mark 'alloca' as safe to vectorize and
+ // separately handle such allocas in the loop vectorizer, either by
+ // sinking the `alloca` into the loop body or by otherwise "privatizing"
+ // the allocation for each vector lane.
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ AllocaInst *AI = findAllocaForValue(SI->getPointerOperand());
+ if (AI && !contains(AI))
+ return false;
+ }
+
if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
if (AG->getNumOperands() == 0) {
diff --git a/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll b/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll
new file mode 100644
index 0000000000000..b4e5af07950c6
--- /dev/null
+++ b/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll
@@ -0,0 +1,57 @@
+; RUN: opt -passes='print<loops>' -disable-output %s 2>&1 | FileCheck %s
+;
+; void func(long n, long *A) {
+; #pragma clang loop vectorize(assume_safety)
+; for (long i = 0; i < n; i += 1) {
+; long t[32];
+; for (long j = 0; j < 32; j += 1)
+; t[j] = i;
+; A[i] = t[i];
+; }
+; }
+;
+; The alloca for `t` usually gets hoisted outside of the loop (either by Clang
+; itself, or by an inlining pass if the loop body is in a function, etc.) and
+; gets incorrectly shared between iterations. Check that isAnnotatedParallel is
+; blocking this kind of usage, as it will not get vectorized correctly unless
+; mem2reg converts the array.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @func(i64 %n, ptr noalias nonnull %A) {
+entry:
+ %t = alloca [32 x i64], align 16
+ %cmp17 = icmp sgt i64 %n, 0
+ br i1 %cmp17, label %for.body, label %for.cond.cleanup
+
+for.body:
+ %i.018 = phi i64 [ %add8, %for.cond.cleanup3 ], [ 0, %entry ]
+ br label %for.body4
+
+for.body4:
+ %j.016 = phi i64 [ 0, %for.body ], [ %add, %for.body4 ]
+ %arrayidx = getelementptr inbounds nuw i64, ptr %t, i64 %j.016
+ store i64 %i.018, ptr %arrayidx, align 8, !llvm.access.group !9
+ %add = add nuw nsw i64 %j.016, 1
+ %exitcond.not = icmp eq i64 %add, 32
+ br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4
+
+for.cond.cleanup3:
+ %arrayidx5 = getelementptr inbounds nuw i64, ptr %t, i64 %i.018
+ %0 = load i64, ptr %arrayidx5, align 8, !llvm.access.group !9
+ %arrayidx6 = getelementptr inbounds nuw i64, ptr %A, i64 %i.018
+ store i64 %0, ptr %arrayidx6, align 8, !llvm.access.group !9
+ %add8 = add nuw nsw i64 %i.018, 1
+ %exitcond19.not = icmp eq i64 %add8, %n
+ br i1 %exitcond19.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10
+
+for.cond.cleanup:
+ ret void
+}
+
+!9 = distinct !{}
+!10 = distinct !{!10, !11}
+!11 = !{!"llvm.loop.parallel_accesses", !9}
+
+; CHECK: Loop info for function 'func':
+; CHECK-NOT: Parallel Loop at depth 1 containing:
>From bbf0529abe141d8ebfdbdabbc62587d90761a819 Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Mon, 9 Feb 2026 18:15:01 +0200
Subject: [PATCH 2/4] Retain parallel metadata on alloca
---
llvm/lib/Transforms/Utils/InlineFunction.cpp | 10 ++++++++--
1 file changed, 8 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 3230b306f17d1..896802b43ef43 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -946,8 +946,9 @@ static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
for (BasicBlock &BB : make_range(FStart, FEnd)) {
for (Instruction &I : BB) {
- // This metadata is only relevant for instructions that access memory.
- if (!I.mayReadOrWriteMemory())
+ // This metadata is only relevant for instructions that access memory and
+ // alloca.
+ if (!I.mayReadOrWriteMemory() && !dyn_cast<AllocaInst>(&I))
continue;
if (MemParallelLoopAccess) {
@@ -963,6 +964,11 @@ static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
I.getMetadata(LLVMContext::MD_access_group), AccessGroup));
+ // The rest of the metadata is only relevant for instructions accessing
+ // memory.
+ if (!I.mayReadOrWriteMemory())
+ continue;
+
if (AliasScope)
I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));
>From 4d7632e1d3dc70285ec2f0d6807bf5953173b3d0 Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Mon, 9 Feb 2026 18:34:09 +0200
Subject: [PATCH 3/4] Allow alloca if access.group metadata is present
---
llvm/lib/Analysis/LoopInfo.cpp | 49 +++++++++++++++++-----------------
1 file changed, 25 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index d5203a20c8c6c..5db4f0771d5bd 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -591,38 +591,39 @@ bool Loop::isAnnotatedParallel() const {
if (!I.mayReadOrWriteMemory())
continue;
+ auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
+ if (AG->getNumOperands() == 0) {
+ assert(isValidAsAccessGroup(AG) && "Item must be an access group");
+ return ParallelAccessGroups.count(AG);
+ }
+
+ for (const MDOperand &AccessListItem : AG->operands()) {
+ MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
+ assert(isValidAsAccessGroup(AccGroup) &&
+ "List item must be an access group");
+ if (ParallelAccessGroups.count(AccGroup))
+ return true;
+ }
+ return false;
+ };
+
// If the loop contains a store instruction into an alloca that is outside
// of the loop, it is possible that the alloca was initially related to a
// loop-local variable but got hoisted outside during e.g. inlining or
- // some other parallel-loop-unaware pass.
- //
- // TODO: Allow metadata to mark 'alloca' as safe to vectorize and
- // separately handle such allocas in the loop vectorizer, either by
- // sinking the `alloca` into the loop body or by otherwise "privatizing"
- // the allocation for each vector lane.
+ // some other parallel-loop-unaware pass. However, if the alloca itself
+ // has been marked with the access group metadata, this usage has to be
+ // assumed to be valid.
if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
AllocaInst *AI = findAllocaForValue(SI->getPointerOperand());
- if (AI && !contains(AI))
- return false;
+ if (AI) {
+ MDNode *AccessGroup = AI->getMetadata(LLVMContext::MD_access_group);
+ if (AI && !contains(AI) &&
+ (!AccessGroup || !ContainsAccessGroup(AccessGroup)))
+ return false;
+ }
}
if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
- auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
- if (AG->getNumOperands() == 0) {
- assert(isValidAsAccessGroup(AG) && "Item must be an access group");
- return ParallelAccessGroups.count(AG);
- }
-
- for (const MDOperand &AccessListItem : AG->operands()) {
- MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
- assert(isValidAsAccessGroup(AccGroup) &&
- "List item must be an access group");
- if (ParallelAccessGroups.count(AccGroup))
- return true;
- }
- return false;
- };
-
if (ContainsAccessGroup(AccessGroup))
continue;
}
>From 4888a4c9660e557d6b797778b667286ca836f75c Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Tue, 10 Feb 2026 18:09:18 +0200
Subject: [PATCH 4/4] Make LAA only recognize loads&stores to alloca in
parallel loops
---
llvm/include/llvm/Analysis/LoopInfo.h | 4 ++
llvm/lib/Analysis/LoopAccessAnalysis.cpp | 44 ++++++++++---
llvm/lib/Analysis/LoopInfo.cpp | 80 ++++++++++--------------
3 files changed, 75 insertions(+), 53 deletions(-)
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 0ecb1141dc1be..4f8b31d11b4ca 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -341,6 +341,10 @@ class LLVM_ABI Loop : public LoopBase<BasicBlock, Loop> {
/// iterations.
bool isAnnotatedParallel() const;
+ /// Returns true if the loop's parallel_accesses metadata contains the given
+ /// access group.
+ bool containsAccessGroup(MDNode* AG) const;
+
/// Return the llvm.loop loop id metadata node for this loop if it is present.
///
/// If this loop contains the same llvm.loop metadata on each branch to the
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 2fc724970747c..86966a4e7ecd6 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2528,6 +2528,12 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
+ if (IsAnnotatedParallel) {
+ LLVM_DEBUG(
+ dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
+ << "checks.\n");
+ }
+
const bool EnableMemAccessVersioningOfLoop =
EnableMemAccessVersioning &&
!TheLoop->getHeader()->getParent()->hasOptSize();
@@ -2597,6 +2603,29 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
HasComplexMemInst = true;
continue;
}
+
+ // For parallel loops, we only want to analyze alloca-based addresses.
+ // If the loop accesses an alloca that is outside of the loop, it is
+ // possible that the alloca was initially related to a loop-local
+ // variable but got hoisted outside during e.g. inlining or some other
+ // parallel-loop-unaware pass. However, if the alloca itself has been
+ // marked with the access group metadata, this usage has to be assumed
+ // to be valid.
+ if (IsAnnotatedParallel) {
+ AllocaInst *AI = findAllocaForValue(Ld->getPointerOperand());
+ // Not accessing alloca, or the alloca is inside the loop, so no race
+ // condition there.
+ if (!AI || TheLoop->contains(AI))
+ continue;
+
+ MDNode *AG = AI->getMetadata(LLVMContext::MD_access_group);
+ // Access group is annotated properly for this loop, assume no race
+ // condition.
+ if (AG && TheLoop->containsAccessGroup(AG))
+ continue;
+
+ // Otherwise, proceed handling the load as if the loop isn't parallel.
+ }
NumLoads++;
Loads.push_back(Ld);
DepChecker->addAccess(Ld);
@@ -2621,6 +2650,14 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
HasComplexMemInst = true;
continue;
}
+ if (IsAnnotatedParallel) {
+ AllocaInst *AI = findAllocaForValue(St->getPointerOperand());
+ if (!AI || TheLoop->contains(AI))
+ continue;
+ MDNode *AG = AI->getMetadata(LLVMContext::MD_access_group);
+ if (AG && TheLoop->containsAccessGroup(AG))
+ continue;
+ }
NumStores++;
Stores.push_back(St);
DepChecker->addAccess(St);
@@ -2689,13 +2726,6 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
}
}
- if (IsAnnotatedParallel) {
- LLVM_DEBUG(
- dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
- << "checks.\n");
- return true;
- }
-
for (LoadInst *LD : Loads) {
Value *Ptr = LD->getPointerOperand();
// If we did *not* see this pointer before, insert it to the
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 5db4f0771d5bd..1012001a4cc87 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -568,19 +568,6 @@ bool Loop::isAnnotatedParallel() const {
if (!DesiredLoopIdMetadata)
return false;
- MDNode *ParallelAccesses =
- findOptionMDForLoop(this, "llvm.loop.parallel_accesses");
- SmallPtrSet<MDNode *, 4>
- ParallelAccessGroups; // For scalable 'contains' check.
- if (ParallelAccesses) {
- for (const MDOperand &MD : drop_begin(ParallelAccesses->operands())) {
- MDNode *AccGroup = cast<MDNode>(MD.get());
- assert(isValidAsAccessGroup(AccGroup) &&
- "List item must be an access group");
- ParallelAccessGroups.insert(AccGroup);
- }
- }
-
// The loop branch contains the parallel loop metadata. In order to ensure
// that any parallel-loop-unaware optimization pass hasn't added loop-carried
// dependencies (thus converted the loop back to a sequential loop), check
@@ -591,40 +578,8 @@ bool Loop::isAnnotatedParallel() const {
if (!I.mayReadOrWriteMemory())
continue;
- auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
- if (AG->getNumOperands() == 0) {
- assert(isValidAsAccessGroup(AG) && "Item must be an access group");
- return ParallelAccessGroups.count(AG);
- }
-
- for (const MDOperand &AccessListItem : AG->operands()) {
- MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
- assert(isValidAsAccessGroup(AccGroup) &&
- "List item must be an access group");
- if (ParallelAccessGroups.count(AccGroup))
- return true;
- }
- return false;
- };
-
- // If the loop contains a store instruction into an alloca that is outside
- // of the loop, it is possible that the alloca was initially related to a
- // loop-local variable but got hoisted outside during e.g. inlining or
- // some other parallel-loop-unaware pass. However, if the alloca itself
- // has been marked with the access group metadata, this usage has to be
- // assumed to be valid.
- if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
- AllocaInst *AI = findAllocaForValue(SI->getPointerOperand());
- if (AI) {
- MDNode *AccessGroup = AI->getMetadata(LLVMContext::MD_access_group);
- if (AI && !contains(AI) &&
- (!AccessGroup || !ContainsAccessGroup(AccessGroup)))
- return false;
- }
- }
-
if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
- if (ContainsAccessGroup(AccessGroup))
+ if (containsAccessGroup(AccessGroup))
continue;
}
@@ -645,6 +600,39 @@ bool Loop::isAnnotatedParallel() const {
return true;
}
+bool Loop::containsAccessGroup(MDNode* AG) const
+{
+ MDNode *ParallelAccesses =
+ findOptionMDForLoop(this, "llvm.loop.parallel_accesses");
+ auto MetadataContainsGroup = [ParallelAccesses](MDNode *AccGroup) -> bool {
+ if (ParallelAccesses) {
+ for (const MDOperand &MD : drop_begin(ParallelAccesses->operands())) {
+ MDNode *Group = cast<MDNode>(MD.get());
+ assert(isValidAsAccessGroup(Group) &&
+ "List item must be an access group");
+
+ if (AccGroup == Group)
+ return true;
+ }
+ }
+ return false;
+ };
+
+ if (AG->getNumOperands() == 0) {
+ assert(isValidAsAccessGroup(AG) && "Item must be an access group");
+ return MetadataContainsGroup(AG);
+ }
+
+ for (const MDOperand &AccessListItem : AG->operands()) {
+ MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
+ assert(isValidAsAccessGroup(AccGroup) &&
+ "List item must be an access group");
+ if (MetadataContainsGroup(AccGroup))
+ return true;
+ }
+ return false;
+}
+
DebugLoc Loop::getStartLoc() const { return getLocRange().getStart(); }
Loop::LocRange Loop::getLocRange() const {
More information about the llvm-commits
mailing list