[llvm] [LoopInfo] Don't recognize loop as parallel if it stores to out-of-loop alloca (PR #180551)

Tue Feb 10 08:10:02 PST 2026

https://github.com/juliusikkala updated https://github.com/llvm/llvm-project/pull/180551

>From b626e7e07409d093f9f69aeaa204c529fde11bcc Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Mon, 9 Feb 2026 17:30:54 +0200
Subject: [PATCH 1/4] [LoopInfo] Don't recognize loop as parallel if it stores
 to out-of-loop alloca

---
 llvm/lib/Analysis/LoopInfo.cpp                | 15 +++++
 .../LoopInfo/annotated-parallel-alloca.ll     | 57 +++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll

diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index a364b21c64b01..d5203a20c8c6c 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -591,6 +591,21 @@ bool Loop::isAnnotatedParallel() const {
       if (!I.mayReadOrWriteMemory())
         continue;
 
+      // If the loop contains a store instruction into an alloca that is outside
+      // of the loop, it is possible that the alloca was initially related to a
+      // loop-local variable but got hoisted outside during e.g. inlining or
+      // some other parallel-loop-unaware pass.
+      //
+      // TODO: Allow metadata to mark 'alloca' as safe to vectorize and
+      // separately handle such allocas in the loop vectorizer, either by
+      // sinking the `alloca` into the loop body or by otherwise "privatizing"
+      // the allocation for each vector lane.
+      if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+        AllocaInst *AI = findAllocaForValue(SI->getPointerOperand());
+        if (AI && !contains(AI))
+          return false;
+      }
+
       if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
         auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
           if (AG->getNumOperands() == 0) {
diff --git a/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll b/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll
new file mode 100644
index 0000000000000..b4e5af07950c6
--- /dev/null
+++ b/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll
@@ -0,0 +1,57 @@
+; RUN: opt -passes='print<loops>' -disable-output %s 2>&1 | FileCheck %s
+;
+; void func(long n, long *A) {
+;   #pragma clang loop vectorize(assume_safety)
+;   for (long i = 0; i < n; i += 1) {
+;     long t[32];
+;     for (long j = 0; j < 32; j += 1)
+;       t[j] = i;
+;     A[i] = t[i];
+;   }
+; }
+;
+; The alloca for `t` usually gets hoisted outside of the loop (either by Clang
+; itself, or by an inlining pass if the loop body is in a function, etc.) and
+; gets incorrectly shared between iterations. Check that isAnnotatedParallel is
+; blocking this kind of usage, as it will not get vectorized correctly unless
+; mem2reg converts the array.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @func(i64 %n, ptr noalias nonnull %A) {
+entry:
+  %t = alloca [32 x i64], align 16
+  %cmp17 = icmp sgt i64 %n, 0
+  br i1 %cmp17, label %for.body, label %for.cond.cleanup
+
+for.body:
+  %i.018 = phi i64 [ %add8, %for.cond.cleanup3 ], [ 0, %entry ]
+  br label %for.body4
+
+for.body4:
+  %j.016 = phi i64 [ 0, %for.body ], [ %add, %for.body4 ]
+  %arrayidx = getelementptr inbounds nuw i64, ptr %t, i64 %j.016
+  store i64 %i.018, ptr %arrayidx, align 8, !llvm.access.group !9
+  %add = add nuw nsw i64 %j.016, 1
+  %exitcond.not = icmp eq i64 %add, 32
+  br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4
+
+for.cond.cleanup3:
+  %arrayidx5 = getelementptr inbounds nuw i64, ptr %t, i64 %i.018
+  %0 = load i64, ptr %arrayidx5, align 8, !llvm.access.group !9
+  %arrayidx6 = getelementptr inbounds nuw i64, ptr %A, i64 %i.018
+  store i64 %0, ptr %arrayidx6, align 8, !llvm.access.group !9
+  %add8 = add nuw nsw i64 %i.018, 1
+  %exitcond19.not = icmp eq i64 %add8, %n
+  br i1 %exitcond19.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10
+
+for.cond.cleanup:
+  ret void
+}
+
+!9 = distinct !{}
+!10 = distinct !{!10, !11}
+!11 = !{!"llvm.loop.parallel_accesses", !9}
+
+; CHECK: Loop info for function 'func':
+; CHECK-NOT: Parallel Loop at depth 1 containing:

>From bbf0529abe141d8ebfdbdabbc62587d90761a819 Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Mon, 9 Feb 2026 18:15:01 +0200
Subject: [PATCH 2/4] Retain parallel metadata on alloca

---
 llvm/lib/Transforms/Utils/InlineFunction.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 3230b306f17d1..896802b43ef43 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -946,8 +946,9 @@ static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
 
   for (BasicBlock &BB : make_range(FStart, FEnd)) {
     for (Instruction &I : BB) {
-      // This metadata is only relevant for instructions that access memory.
-      if (!I.mayReadOrWriteMemory())
+      // This metadata is only relevant for instructions that access memory and
+      // alloca.
+      if (!I.mayReadOrWriteMemory() && !dyn_cast<AllocaInst>(&I))
         continue;
 
       if (MemParallelLoopAccess) {
@@ -963,6 +964,11 @@ static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
         I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
             I.getMetadata(LLVMContext::MD_access_group), AccessGroup));
 
+      // The rest of the metadata is only relevant for instructions accessing
+      // memory.
+      if (!I.mayReadOrWriteMemory())
+        continue;
+
       if (AliasScope)
         I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
             I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));

>From 4d7632e1d3dc70285ec2f0d6807bf5953173b3d0 Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Mon, 9 Feb 2026 18:34:09 +0200
Subject: [PATCH 3/4] Allow alloca if access.group metadata is present

---
 llvm/lib/Analysis/LoopInfo.cpp | 49 +++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index d5203a20c8c6c..5db4f0771d5bd 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -591,38 +591,39 @@ bool Loop::isAnnotatedParallel() const {
       if (!I.mayReadOrWriteMemory())
         continue;
 
+      auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
+        if (AG->getNumOperands() == 0) {
+          assert(isValidAsAccessGroup(AG) && "Item must be an access group");
+          return ParallelAccessGroups.count(AG);
+        }
+
+        for (const MDOperand &AccessListItem : AG->operands()) {
+          MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
+          assert(isValidAsAccessGroup(AccGroup) &&
+                 "List item must be an access group");
+          if (ParallelAccessGroups.count(AccGroup))
+            return true;
+        }
+        return false;
+      };
+
       // If the loop contains a store instruction into an alloca that is outside
       // of the loop, it is possible that the alloca was initially related to a
       // loop-local variable but got hoisted outside during e.g. inlining or
-      // some other parallel-loop-unaware pass.
-      //
-      // TODO: Allow metadata to mark 'alloca' as safe to vectorize and
-      // separately handle such allocas in the loop vectorizer, either by
-      // sinking the `alloca` into the loop body or by otherwise "privatizing"
-      // the allocation for each vector lane.
+      // some other parallel-loop-unaware pass. However, if the alloca itself
+      // has been marked with the access group metadata, this usage has to be
+      // assumed to be valid.
       if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
         AllocaInst *AI = findAllocaForValue(SI->getPointerOperand());
-        if (AI && !contains(AI))
-          return false;
+        if (AI) {
+          MDNode *AccessGroup = AI->getMetadata(LLVMContext::MD_access_group);
+          if (AI && !contains(AI) &&
+                  (!AccessGroup || !ContainsAccessGroup(AccessGroup)))
+            return false;
+        }
       }
 
       if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
-        auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
-          if (AG->getNumOperands() == 0) {
-            assert(isValidAsAccessGroup(AG) && "Item must be an access group");
-            return ParallelAccessGroups.count(AG);
-          }
-
-          for (const MDOperand &AccessListItem : AG->operands()) {
-            MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
-            assert(isValidAsAccessGroup(AccGroup) &&
-                   "List item must be an access group");
-            if (ParallelAccessGroups.count(AccGroup))
-              return true;
-          }
-          return false;
-        };
-
         if (ContainsAccessGroup(AccessGroup))
           continue;
       }

>From 4888a4c9660e557d6b797778b667286ca836f75c Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Tue, 10 Feb 2026 18:09:18 +0200
Subject: [PATCH 4/4] Make LAA only recognize loads&stores to alloca in
 parallel loops

---
 llvm/include/llvm/Analysis/LoopInfo.h    |  4 ++
 llvm/lib/Analysis/LoopAccessAnalysis.cpp | 44 ++++++++++---
 llvm/lib/Analysis/LoopInfo.cpp           | 80 ++++++++++--------------
 3 files changed, 75 insertions(+), 53 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 0ecb1141dc1be..4f8b31d11b4ca 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -341,6 +341,10 @@ class LLVM_ABI Loop : public LoopBase<BasicBlock, Loop> {
   /// iterations.
   bool isAnnotatedParallel() const;
 
+  /// Returns true if the loop's parallel_accesses metadata contains the given
+  /// access group.
+  bool containsAccessGroup(MDNode* AG) const;
+
   /// Return the llvm.loop loop id metadata node for this loop if it is present.
   ///
   /// If this loop contains the same llvm.loop metadata on each branch to the
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 2fc724970747c..86966a4e7ecd6 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2528,6 +2528,12 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
 
   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
 
+  if (IsAnnotatedParallel) {
+    LLVM_DEBUG(
+        dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
+               << "checks.\n");
+  }
+
   const bool EnableMemAccessVersioningOfLoop =
       EnableMemAccessVersioning &&
       !TheLoop->getHeader()->getParent()->hasOptSize();
@@ -2597,6 +2603,29 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
           HasComplexMemInst = true;
           continue;
         }
+
+        // For parallel loops, we only want to analyze alloca-based addresses.
+        // If the loop accesses an alloca that is outside of the loop, it is
+        // possible that the alloca was initially related to a loop-local
+        // variable but got hoisted outside during e.g. inlining or some other
+        // parallel-loop-unaware pass. However, if the alloca itself has been
+        // marked with the access group metadata, this usage has to be assumed
+        // to be valid.
+        if (IsAnnotatedParallel) {
+          AllocaInst *AI = findAllocaForValue(Ld->getPointerOperand());
+          // Not accessing alloca, or the alloca is inside the loop, so no race
+          // condition there.
+          if (!AI || TheLoop->contains(AI))
+            continue;
+
+          MDNode *AG = AI->getMetadata(LLVMContext::MD_access_group);
+          // Access group is annotated properly for this loop, assume no race
+          // condition.
+          if (AG && TheLoop->containsAccessGroup(AG))
+            continue;
+
+          // Otherwise, proceed handling the load as if the loop isn't parallel.
+        }
         NumLoads++;
         Loads.push_back(Ld);
         DepChecker->addAccess(Ld);
@@ -2621,6 +2650,14 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
           HasComplexMemInst = true;
           continue;
         }
+        if (IsAnnotatedParallel) {
+          AllocaInst *AI = findAllocaForValue(St->getPointerOperand());
+          if (!AI || TheLoop->contains(AI))
+            continue;
+          MDNode *AG = AI->getMetadata(LLVMContext::MD_access_group);
+          if (AG && TheLoop->containsAccessGroup(AG))
+            continue;
+        }
         NumStores++;
         Stores.push_back(St);
         DepChecker->addAccess(St);
@@ -2689,13 +2726,6 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
     }
   }
 
-  if (IsAnnotatedParallel) {
-    LLVM_DEBUG(
-        dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
-               << "checks.\n");
-    return true;
-  }
-
   for (LoadInst *LD : Loads) {
     Value *Ptr = LD->getPointerOperand();
     // If we did *not* see this pointer before, insert it to the
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 5db4f0771d5bd..1012001a4cc87 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -568,19 +568,6 @@ bool Loop::isAnnotatedParallel() const {
   if (!DesiredLoopIdMetadata)
     return false;
 
-  MDNode *ParallelAccesses =
-      findOptionMDForLoop(this, "llvm.loop.parallel_accesses");
-  SmallPtrSet<MDNode *, 4>
-      ParallelAccessGroups; // For scalable 'contains' check.
-  if (ParallelAccesses) {
-    for (const MDOperand &MD : drop_begin(ParallelAccesses->operands())) {
-      MDNode *AccGroup = cast<MDNode>(MD.get());
-      assert(isValidAsAccessGroup(AccGroup) &&
-             "List item must be an access group");
-      ParallelAccessGroups.insert(AccGroup);
-    }
-  }
-
   // The loop branch contains the parallel loop metadata. In order to ensure
   // that any parallel-loop-unaware optimization pass hasn't added loop-carried
   // dependencies (thus converted the loop back to a sequential loop), check
@@ -591,40 +578,8 @@ bool Loop::isAnnotatedParallel() const {
       if (!I.mayReadOrWriteMemory())
         continue;
 
-      auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
-        if (AG->getNumOperands() == 0) {
-          assert(isValidAsAccessGroup(AG) && "Item must be an access group");
-          return ParallelAccessGroups.count(AG);
-        }
-
-        for (const MDOperand &AccessListItem : AG->operands()) {
-          MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
-          assert(isValidAsAccessGroup(AccGroup) &&
-                 "List item must be an access group");
-          if (ParallelAccessGroups.count(AccGroup))
-            return true;
-        }
-        return false;
-      };
-
-      // If the loop contains a store instruction into an alloca that is outside
-      // of the loop, it is possible that the alloca was initially related to a
-      // loop-local variable but got hoisted outside during e.g. inlining or
-      // some other parallel-loop-unaware pass. However, if the alloca itself
-      // has been marked with the access group metadata, this usage has to be
-      // assumed to be valid.
-      if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
-        AllocaInst *AI = findAllocaForValue(SI->getPointerOperand());
-        if (AI) {
-          MDNode *AccessGroup = AI->getMetadata(LLVMContext::MD_access_group);
-          if (AI && !contains(AI) &&
-                  (!AccessGroup || !ContainsAccessGroup(AccessGroup)))
-            return false;
-        }
-      }
-
       if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
-        if (ContainsAccessGroup(AccessGroup))
+        if (containsAccessGroup(AccessGroup))
           continue;
       }
 
@@ -645,6 +600,39 @@ bool Loop::isAnnotatedParallel() const {
   return true;
 }
 
+bool Loop::containsAccessGroup(MDNode* AG) const
+{
+  MDNode *ParallelAccesses =
+      findOptionMDForLoop(this, "llvm.loop.parallel_accesses");
+  auto MetadataContainsGroup = [ParallelAccesses](MDNode *AccGroup) -> bool {
+    if (ParallelAccesses) {
+      for (const MDOperand &MD : drop_begin(ParallelAccesses->operands())) {
+        MDNode *Group = cast<MDNode>(MD.get());
+        assert(isValidAsAccessGroup(Group) &&
+               "List item must be an access group");
+
+        if (AccGroup == Group)
+          return true;
+      }
+    }
+    return false;
+  };
+
+  if (AG->getNumOperands() == 0) {
+    assert(isValidAsAccessGroup(AG) && "Item must be an access group");
+    return MetadataContainsGroup(AG);
+  }
+
+  for (const MDOperand &AccessListItem : AG->operands()) {
+    MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
+    assert(isValidAsAccessGroup(AccGroup) &&
+           "List item must be an access group");
+    if (MetadataContainsGroup(AccGroup))
+      return true;
+  }
+  return false;
+}
+
 DebugLoc Loop::getStartLoc() const { return getLocRange().getStart(); }
 
 Loop::LocRange Loop::getLocRange() const {