[llvm] [LoopInfo] Don't recognize loop as parallel if it stores to out-of-loop alloca (PR #180551)

Fri Feb 13 07:54:38 PST 2026

https://github.com/juliusikkala updated https://github.com/llvm/llvm-project/pull/180551

>From b626e7e07409d093f9f69aeaa204c529fde11bcc Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Mon, 9 Feb 2026 17:30:54 +0200
Subject: [PATCH 1/8] [LoopInfo] Don't recognize loop as parallel if it stores
 to out-of-loop alloca

---
 llvm/lib/Analysis/LoopInfo.cpp                | 15 +++++
 .../LoopInfo/annotated-parallel-alloca.ll     | 57 +++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll

diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index a364b21c64b01..d5203a20c8c6c 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -591,6 +591,21 @@ bool Loop::isAnnotatedParallel() const {
       if (!I.mayReadOrWriteMemory())
         continue;
 
+      // If the loop contains a store instruction into an alloca that is outside
+      // of the loop, it is possible that the alloca was initially related to a
+      // loop-local variable but got hoisted outside during e.g. inlining or
+      // some other parallel-loop-unaware pass.
+      //
+      // TODO: Allow metadata to mark 'alloca' as safe to vectorize and
+      // separately handle such allocas in the loop vectorizer, either by
+      // sinking the `alloca` into the loop body or by otherwise "privatizing"
+      // the allocation for each vector lane.
+      if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+        AllocaInst *AI = findAllocaForValue(SI->getPointerOperand());
+        if (AI && !contains(AI))
+          return false;
+      }
+
       if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
         auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
           if (AG->getNumOperands() == 0) {
diff --git a/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll b/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll
new file mode 100644
index 0000000000000..b4e5af07950c6
--- /dev/null
+++ b/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll
@@ -0,0 +1,57 @@
+; RUN: opt -passes='print<loops>' -disable-output %s 2>&1 | FileCheck %s
+;
+; void func(long n, long *A) {
+;   #pragma clang loop vectorize(assume_safety)
+;   for (long i = 0; i < n; i += 1) {
+;     long t[32];
+;     for (long j = 0; j < 32; j += 1)
+;       t[j] = i;
+;     A[i] = t[i];
+;   }
+; }
+;
+; The alloca for `t` usually gets hoisted outside of the loop (either by Clang
+; itself, or by an inlining pass if the loop body is in a function, etc.) and
+; gets incorrectly shared between iterations. Check that isAnnotatedParallel is
+; blocking this kind of usage, as it will not get vectorized correctly unless
+; mem2reg converts the array.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @func(i64 %n, ptr noalias nonnull %A) {
+entry:
+  %t = alloca [32 x i64], align 16
+  %cmp17 = icmp sgt i64 %n, 0
+  br i1 %cmp17, label %for.body, label %for.cond.cleanup
+
+for.body:
+  %i.018 = phi i64 [ %add8, %for.cond.cleanup3 ], [ 0, %entry ]
+  br label %for.body4
+
+for.body4:
+  %j.016 = phi i64 [ 0, %for.body ], [ %add, %for.body4 ]
+  %arrayidx = getelementptr inbounds nuw i64, ptr %t, i64 %j.016
+  store i64 %i.018, ptr %arrayidx, align 8, !llvm.access.group !9
+  %add = add nuw nsw i64 %j.016, 1
+  %exitcond.not = icmp eq i64 %add, 32
+  br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4
+
+for.cond.cleanup3:
+  %arrayidx5 = getelementptr inbounds nuw i64, ptr %t, i64 %i.018
+  %0 = load i64, ptr %arrayidx5, align 8, !llvm.access.group !9
+  %arrayidx6 = getelementptr inbounds nuw i64, ptr %A, i64 %i.018
+  store i64 %0, ptr %arrayidx6, align 8, !llvm.access.group !9
+  %add8 = add nuw nsw i64 %i.018, 1
+  %exitcond19.not = icmp eq i64 %add8, %n
+  br i1 %exitcond19.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10
+
+for.cond.cleanup:
+  ret void
+}
+
+!9 = distinct !{}
+!10 = distinct !{!10, !11}
+!11 = !{!"llvm.loop.parallel_accesses", !9}
+
+; CHECK: Loop info for function 'func':
+; CHECK-NOT: Parallel Loop at depth 1 containing:

>From bbf0529abe141d8ebfdbdabbc62587d90761a819 Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Mon, 9 Feb 2026 18:15:01 +0200
Subject: [PATCH 2/8] Retain parallel metadata on alloca

---
 llvm/lib/Transforms/Utils/InlineFunction.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 3230b306f17d1..896802b43ef43 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -946,8 +946,9 @@ static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
 
   for (BasicBlock &BB : make_range(FStart, FEnd)) {
     for (Instruction &I : BB) {
-      // This metadata is only relevant for instructions that access memory.
-      if (!I.mayReadOrWriteMemory())
+      // This metadata is only relevant for instructions that access memory and
+      // alloca.
+      if (!I.mayReadOrWriteMemory() && !dyn_cast<AllocaInst>(&I))
         continue;
 
       if (MemParallelLoopAccess) {
@@ -963,6 +964,11 @@ static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
         I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
             I.getMetadata(LLVMContext::MD_access_group), AccessGroup));
 
+      // The rest of the metadata is only relevant for instructions accessing
+      // memory.
+      if (!I.mayReadOrWriteMemory())
+        continue;
+
       if (AliasScope)
         I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
             I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));

>From 4d7632e1d3dc70285ec2f0d6807bf5953173b3d0 Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Mon, 9 Feb 2026 18:34:09 +0200
Subject: [PATCH 3/8] Allow alloca if access.group metadata is present

---
 llvm/lib/Analysis/LoopInfo.cpp | 49 +++++++++++++++++-----------------
 1 file changed, 25 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index d5203a20c8c6c..5db4f0771d5bd 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -591,38 +591,39 @@ bool Loop::isAnnotatedParallel() const {
       if (!I.mayReadOrWriteMemory())
         continue;
 
+      auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
+        if (AG->getNumOperands() == 0) {
+          assert(isValidAsAccessGroup(AG) && "Item must be an access group");
+          return ParallelAccessGroups.count(AG);
+        }
+
+        for (const MDOperand &AccessListItem : AG->operands()) {
+          MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
+          assert(isValidAsAccessGroup(AccGroup) &&
+                 "List item must be an access group");
+          if (ParallelAccessGroups.count(AccGroup))
+            return true;
+        }
+        return false;
+      };
+
       // If the loop contains a store instruction into an alloca that is outside
       // of the loop, it is possible that the alloca was initially related to a
       // loop-local variable but got hoisted outside during e.g. inlining or
-      // some other parallel-loop-unaware pass.
-      //
-      // TODO: Allow metadata to mark 'alloca' as safe to vectorize and
-      // separately handle such allocas in the loop vectorizer, either by
-      // sinking the `alloca` into the loop body or by otherwise "privatizing"
-      // the allocation for each vector lane.
+      // some other parallel-loop-unaware pass. However, if the alloca itself
+      // has been marked with the access group metadata, this usage has to be
+      // assumed to be valid.
       if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
         AllocaInst *AI = findAllocaForValue(SI->getPointerOperand());
-        if (AI && !contains(AI))
-          return false;
+        if (AI) {
+          MDNode *AccessGroup = AI->getMetadata(LLVMContext::MD_access_group);
+          if (AI && !contains(AI) &&
+                  (!AccessGroup || !ContainsAccessGroup(AccessGroup)))
+            return false;
+        }
       }
 
       if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
-        auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
-          if (AG->getNumOperands() == 0) {
-            assert(isValidAsAccessGroup(AG) && "Item must be an access group");
-            return ParallelAccessGroups.count(AG);
-          }
-
-          for (const MDOperand &AccessListItem : AG->operands()) {
-            MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
-            assert(isValidAsAccessGroup(AccGroup) &&
-                   "List item must be an access group");
-            if (ParallelAccessGroups.count(AccGroup))
-              return true;
-          }
-          return false;
-        };
-
         if (ContainsAccessGroup(AccessGroup))
           continue;
       }

>From 4888a4c9660e557d6b797778b667286ca836f75c Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Tue, 10 Feb 2026 18:09:18 +0200
Subject: [PATCH 4/8] Make LAA only recognize loads&stores to alloca in
 parallel loops

---
 llvm/include/llvm/Analysis/LoopInfo.h    |  4 ++
 llvm/lib/Analysis/LoopAccessAnalysis.cpp | 44 ++++++++++---
 llvm/lib/Analysis/LoopInfo.cpp           | 80 ++++++++++--------------
 3 files changed, 75 insertions(+), 53 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 0ecb1141dc1be..4f8b31d11b4ca 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -341,6 +341,10 @@ class LLVM_ABI Loop : public LoopBase<BasicBlock, Loop> {
   /// iterations.
   bool isAnnotatedParallel() const;
 
+  /// Returns true if the loop's parallel_accesses metadata contains the given
+  /// access group.
+  bool containsAccessGroup(MDNode* AG) const;
+
   /// Return the llvm.loop loop id metadata node for this loop if it is present.
   ///
   /// If this loop contains the same llvm.loop metadata on each branch to the
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 2fc724970747c..86966a4e7ecd6 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2528,6 +2528,12 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
 
   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
 
+  if (IsAnnotatedParallel) {
+    LLVM_DEBUG(
+        dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
+               << "checks.\n");
+  }
+
   const bool EnableMemAccessVersioningOfLoop =
       EnableMemAccessVersioning &&
       !TheLoop->getHeader()->getParent()->hasOptSize();
@@ -2597,6 +2603,29 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
           HasComplexMemInst = true;
           continue;
         }
+
+        // For parallel loops, we only want to analyze alloca-based addresses.
+        // If the loop accesses an alloca that is outside of the loop, it is
+        // possible that the alloca was initially related to a loop-local
+        // variable but got hoisted outside during e.g. inlining or some other
+        // parallel-loop-unaware pass. However, if the alloca itself has been
+        // marked with the access group metadata, this usage has to be assumed
+        // to be valid.
+        if (IsAnnotatedParallel) {
+          AllocaInst *AI = findAllocaForValue(Ld->getPointerOperand());
+          // Not accessing alloca, or the alloca is inside the loop, so no race
+          // condition there.
+          if (!AI || TheLoop->contains(AI))
+            continue;
+
+          MDNode *AG = AI->getMetadata(LLVMContext::MD_access_group);
+          // Access group is annotated properly for this loop, assume no race
+          // condition.
+          if (AG && TheLoop->containsAccessGroup(AG))
+            continue;
+
+          // Otherwise, proceed handling the load as if the loop isn't parallel.
+        }
         NumLoads++;
         Loads.push_back(Ld);
         DepChecker->addAccess(Ld);
@@ -2621,6 +2650,14 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
           HasComplexMemInst = true;
           continue;
         }
+        if (IsAnnotatedParallel) {
+          AllocaInst *AI = findAllocaForValue(St->getPointerOperand());
+          if (!AI || TheLoop->contains(AI))
+            continue;
+          MDNode *AG = AI->getMetadata(LLVMContext::MD_access_group);
+          if (AG && TheLoop->containsAccessGroup(AG))
+            continue;
+        }
         NumStores++;
         Stores.push_back(St);
         DepChecker->addAccess(St);
@@ -2689,13 +2726,6 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
     }
   }
 
-  if (IsAnnotatedParallel) {
-    LLVM_DEBUG(
-        dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
-               << "checks.\n");
-    return true;
-  }
-
   for (LoadInst *LD : Loads) {
     Value *Ptr = LD->getPointerOperand();
     // If we did *not* see this pointer before, insert it to the
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 5db4f0771d5bd..1012001a4cc87 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -568,19 +568,6 @@ bool Loop::isAnnotatedParallel() const {
   if (!DesiredLoopIdMetadata)
     return false;
 
-  MDNode *ParallelAccesses =
-      findOptionMDForLoop(this, "llvm.loop.parallel_accesses");
-  SmallPtrSet<MDNode *, 4>
-      ParallelAccessGroups; // For scalable 'contains' check.
-  if (ParallelAccesses) {
-    for (const MDOperand &MD : drop_begin(ParallelAccesses->operands())) {
-      MDNode *AccGroup = cast<MDNode>(MD.get());
-      assert(isValidAsAccessGroup(AccGroup) &&
-             "List item must be an access group");
-      ParallelAccessGroups.insert(AccGroup);
-    }
-  }
-
   // The loop branch contains the parallel loop metadata. In order to ensure
   // that any parallel-loop-unaware optimization pass hasn't added loop-carried
   // dependencies (thus converted the loop back to a sequential loop), check
@@ -591,40 +578,8 @@ bool Loop::isAnnotatedParallel() const {
       if (!I.mayReadOrWriteMemory())
         continue;
 
-      auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
-        if (AG->getNumOperands() == 0) {
-          assert(isValidAsAccessGroup(AG) && "Item must be an access group");
-          return ParallelAccessGroups.count(AG);
-        }
-
-        for (const MDOperand &AccessListItem : AG->operands()) {
-          MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
-          assert(isValidAsAccessGroup(AccGroup) &&
-                 "List item must be an access group");
-          if (ParallelAccessGroups.count(AccGroup))
-            return true;
-        }
-        return false;
-      };
-
-      // If the loop contains a store instruction into an alloca that is outside
-      // of the loop, it is possible that the alloca was initially related to a
-      // loop-local variable but got hoisted outside during e.g. inlining or
-      // some other parallel-loop-unaware pass. However, if the alloca itself
-      // has been marked with the access group metadata, this usage has to be
-      // assumed to be valid.
-      if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
-        AllocaInst *AI = findAllocaForValue(SI->getPointerOperand());
-        if (AI) {
-          MDNode *AccessGroup = AI->getMetadata(LLVMContext::MD_access_group);
-          if (AI && !contains(AI) &&
-                  (!AccessGroup || !ContainsAccessGroup(AccessGroup)))
-            return false;
-        }
-      }
-
       if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
-        if (ContainsAccessGroup(AccessGroup))
+        if (containsAccessGroup(AccessGroup))
           continue;
       }
 
@@ -645,6 +600,39 @@ bool Loop::isAnnotatedParallel() const {
   return true;
 }
 
+bool Loop::containsAccessGroup(MDNode* AG) const
+{
+  MDNode *ParallelAccesses =
+      findOptionMDForLoop(this, "llvm.loop.parallel_accesses");
+  auto MetadataContainsGroup = [ParallelAccesses](MDNode *AccGroup) -> bool {
+    if (ParallelAccesses) {
+      for (const MDOperand &MD : drop_begin(ParallelAccesses->operands())) {
+        MDNode *Group = cast<MDNode>(MD.get());
+        assert(isValidAsAccessGroup(Group) &&
+               "List item must be an access group");
+
+        if (AccGroup == Group)
+          return true;
+      }
+    }
+    return false;
+  };
+
+  if (AG->getNumOperands() == 0) {
+    assert(isValidAsAccessGroup(AG) && "Item must be an access group");
+    return MetadataContainsGroup(AG);
+  }
+
+  for (const MDOperand &AccessListItem : AG->operands()) {
+    MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
+    assert(isValidAsAccessGroup(AccGroup) &&
+           "List item must be an access group");
+    if (MetadataContainsGroup(AccGroup))
+      return true;
+  }
+  return false;
+}
+
 DebugLoc Loop::getStartLoc() const { return getLocRange().getStart(); }
 
 Loop::LocRange Loop::getLocRange() const {

>From e9f129915ab546f67daa77702649656d1b2bdf0f Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Thu, 12 Feb 2026 13:47:58 +0200
Subject: [PATCH 5/8] Revert "Make LAA only recognize loads&stores to alloca in
 parallel loops"

This reverts commit 4888a4c9660e557d6b797778b667286ca836f75c.
---
 llvm/include/llvm/Analysis/LoopInfo.h    |  4 --
 llvm/lib/Analysis/LoopAccessAnalysis.cpp | 44 +++----------
 llvm/lib/Analysis/LoopInfo.cpp           | 80 ++++++++++++++----------
 3 files changed, 53 insertions(+), 75 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 4f8b31d11b4ca..0ecb1141dc1be 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -341,10 +341,6 @@ class LLVM_ABI Loop : public LoopBase<BasicBlock, Loop> {
   /// iterations.
   bool isAnnotatedParallel() const;
 
-  /// Returns true if the loop's parallel_accesses metadata contains the given
-  /// access group.
-  bool containsAccessGroup(MDNode* AG) const;
-
   /// Return the llvm.loop loop id metadata node for this loop if it is present.
   ///
   /// If this loop contains the same llvm.loop metadata on each branch to the
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index 86966a4e7ecd6..2fc724970747c 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2528,12 +2528,6 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
 
   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
 
-  if (IsAnnotatedParallel) {
-    LLVM_DEBUG(
-        dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
-               << "checks.\n");
-  }
-
   const bool EnableMemAccessVersioningOfLoop =
       EnableMemAccessVersioning &&
       !TheLoop->getHeader()->getParent()->hasOptSize();
@@ -2603,29 +2597,6 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
           HasComplexMemInst = true;
           continue;
         }
-
-        // For parallel loops, we only want to analyze alloca-based addresses.
-        // If the loop accesses an alloca that is outside of the loop, it is
-        // possible that the alloca was initially related to a loop-local
-        // variable but got hoisted outside during e.g. inlining or some other
-        // parallel-loop-unaware pass. However, if the alloca itself has been
-        // marked with the access group metadata, this usage has to be assumed
-        // to be valid.
-        if (IsAnnotatedParallel) {
-          AllocaInst *AI = findAllocaForValue(Ld->getPointerOperand());
-          // Not accessing alloca, or the alloca is inside the loop, so no race
-          // condition there.
-          if (!AI || TheLoop->contains(AI))
-            continue;
-
-          MDNode *AG = AI->getMetadata(LLVMContext::MD_access_group);
-          // Access group is annotated properly for this loop, assume no race
-          // condition.
-          if (AG && TheLoop->containsAccessGroup(AG))
-            continue;
-
-          // Otherwise, proceed handling the load as if the loop isn't parallel.
-        }
         NumLoads++;
         Loads.push_back(Ld);
         DepChecker->addAccess(Ld);
@@ -2650,14 +2621,6 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
           HasComplexMemInst = true;
           continue;
         }
-        if (IsAnnotatedParallel) {
-          AllocaInst *AI = findAllocaForValue(St->getPointerOperand());
-          if (!AI || TheLoop->contains(AI))
-            continue;
-          MDNode *AG = AI->getMetadata(LLVMContext::MD_access_group);
-          if (AG && TheLoop->containsAccessGroup(AG))
-            continue;
-        }
         NumStores++;
         Stores.push_back(St);
         DepChecker->addAccess(St);
@@ -2726,6 +2689,13 @@ bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
     }
   }
 
+  if (IsAnnotatedParallel) {
+    LLVM_DEBUG(
+        dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
+               << "checks.\n");
+    return true;
+  }
+
   for (LoadInst *LD : Loads) {
     Value *Ptr = LD->getPointerOperand();
     // If we did *not* see this pointer before, insert it to the
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 1012001a4cc87..5db4f0771d5bd 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -568,6 +568,19 @@ bool Loop::isAnnotatedParallel() const {
   if (!DesiredLoopIdMetadata)
     return false;
 
+  MDNode *ParallelAccesses =
+      findOptionMDForLoop(this, "llvm.loop.parallel_accesses");
+  SmallPtrSet<MDNode *, 4>
+      ParallelAccessGroups; // For scalable 'contains' check.
+  if (ParallelAccesses) {
+    for (const MDOperand &MD : drop_begin(ParallelAccesses->operands())) {
+      MDNode *AccGroup = cast<MDNode>(MD.get());
+      assert(isValidAsAccessGroup(AccGroup) &&
+             "List item must be an access group");
+      ParallelAccessGroups.insert(AccGroup);
+    }
+  }
+
   // The loop branch contains the parallel loop metadata. In order to ensure
   // that any parallel-loop-unaware optimization pass hasn't added loop-carried
   // dependencies (thus converted the loop back to a sequential loop), check
@@ -578,8 +591,40 @@ bool Loop::isAnnotatedParallel() const {
       if (!I.mayReadOrWriteMemory())
         continue;
 
+      auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
+        if (AG->getNumOperands() == 0) {
+          assert(isValidAsAccessGroup(AG) && "Item must be an access group");
+          return ParallelAccessGroups.count(AG);
+        }
+
+        for (const MDOperand &AccessListItem : AG->operands()) {
+          MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
+          assert(isValidAsAccessGroup(AccGroup) &&
+                 "List item must be an access group");
+          if (ParallelAccessGroups.count(AccGroup))
+            return true;
+        }
+        return false;
+      };
+
+      // If the loop contains a store instruction into an alloca that is outside
+      // of the loop, it is possible that the alloca was initially related to a
+      // loop-local variable but got hoisted outside during e.g. inlining or
+      // some other parallel-loop-unaware pass. However, if the alloca itself
+      // has been marked with the access group metadata, this usage has to be
+      // assumed to be valid.
+      if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+        AllocaInst *AI = findAllocaForValue(SI->getPointerOperand());
+        if (AI) {
+          MDNode *AccessGroup = AI->getMetadata(LLVMContext::MD_access_group);
+          if (AI && !contains(AI) &&
+                  (!AccessGroup || !ContainsAccessGroup(AccessGroup)))
+            return false;
+        }
+      }
+
       if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
-        if (containsAccessGroup(AccessGroup))
+        if (ContainsAccessGroup(AccessGroup))
           continue;
       }
 
@@ -600,39 +645,6 @@ bool Loop::isAnnotatedParallel() const {
   return true;
 }
 
-bool Loop::containsAccessGroup(MDNode* AG) const
-{
-  MDNode *ParallelAccesses =
-      findOptionMDForLoop(this, "llvm.loop.parallel_accesses");
-  auto MetadataContainsGroup = [ParallelAccesses](MDNode *AccGroup) -> bool {
-    if (ParallelAccesses) {
-      for (const MDOperand &MD : drop_begin(ParallelAccesses->operands())) {
-        MDNode *Group = cast<MDNode>(MD.get());
-        assert(isValidAsAccessGroup(Group) &&
-               "List item must be an access group");
-
-        if (AccGroup == Group)
-          return true;
-      }
-    }
-    return false;
-  };
-
-  if (AG->getNumOperands() == 0) {
-    assert(isValidAsAccessGroup(AG) && "Item must be an access group");
-    return MetadataContainsGroup(AG);
-  }
-
-  for (const MDOperand &AccessListItem : AG->operands()) {
-    MDNode *AccGroup = cast<MDNode>(AccessListItem.get());
-    assert(isValidAsAccessGroup(AccGroup) &&
-           "List item must be an access group");
-    if (MetadataContainsGroup(AccGroup))
-      return true;
-  }
-  return false;
-}
-
 DebugLoc Loop::getStartLoc() const { return getLocRange().getStart(); }
 
 Loop::LocRange Loop::getLocRange() const {

>From f8c3857e681fa361eec5e89dacf1ce60c873218c Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Fri, 13 Feb 2026 16:52:41 +0200
Subject: [PATCH 6/8] Revert inliner marking alloca's with llvm.access.group

---
 llvm/lib/Transforms/Utils/InlineFunction.cpp | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index 896802b43ef43..3230b306f17d1 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -946,9 +946,8 @@ static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
 
   for (BasicBlock &BB : make_range(FStart, FEnd)) {
     for (Instruction &I : BB) {
-      // This metadata is only relevant for instructions that access memory and
-      // alloca.
-      if (!I.mayReadOrWriteMemory() && !dyn_cast<AllocaInst>(&I))
+      // This metadata is only relevant for instructions that access memory.
+      if (!I.mayReadOrWriteMemory())
         continue;
 
       if (MemParallelLoopAccess) {
@@ -964,11 +963,6 @@ static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,
         I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(
             I.getMetadata(LLVMContext::MD_access_group), AccessGroup));
 
-      // The rest of the metadata is only relevant for instructions accessing
-      // memory.
-      if (!I.mayReadOrWriteMemory())
-        continue;
-
       if (AliasScope)
         I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(
             I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));

>From 04e90b2cce9bc0c7f63271f8e225d805636c9d70 Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Fri, 13 Feb 2026 17:49:30 +0200
Subject: [PATCH 7/8] Update LangRef to add llvm.access.group for alloca

---
 llvm/docs/LangRef.rst | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 28edd439b6900..5e69f92b25c71 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -8176,10 +8176,10 @@ as it is not affected by the ``llvm.loop.disable_nonforced`` metadata.
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 ``llvm.access.group`` metadata can be attached to any instruction that
-potentially accesses memory. It can point to a single distinct metadata
-node, which we call access group. This node represents all memory access
-instructions referring to it via ``llvm.access.group``. When an
-instruction belongs to multiple access groups, it can also point to a
+potentially accesses or allocates memory. It can point to a single distinct
+metadata node, which we call access group. This node represents all memory
+access or allocation instructions referring to it via ``llvm.access.group``.
+When an instruction belongs to multiple access groups, it can also point to a
 list of accesses groups, illustrated by the following example.
 
 .. code-block:: llvm
@@ -8201,8 +8201,8 @@ situation that the content must be updated which, because metadata is
 immutable by design, would required finding and updating all references
 to the access group node.
 
-The access group can be used to refer to a memory access instruction
-without pointing to it directly (which is not possible in global
+The access group can be used to refer to a memory access or allocation
+instruction without pointing to it directly (which is not possible in global
 metadata). Currently, the only metadata making use of it is
 ``llvm.loop.parallel_accesses``.
 
@@ -8223,12 +8223,12 @@ this loop. Instructions that belong to multiple access groups are
 considered having this property if at least one of the access groups
 matches the ``llvm.loop.parallel_accesses`` list.
 
-If all memory-accessing instructions in a loop have
-``llvm.access.group`` metadata that each refer to one of the access
-groups of a loop's ``llvm.loop.parallel_accesses`` metadata, then the
-loop has no loop carried memory dependencies and is considered to be a
-parallel loop. If there is a loop-carried dependency, the behavior is
-undefined.
+If all memory-accessing instructions in a loop and all ``alloca`` instructions
+whose address range is being written to by instructions in the loop have
+``llvm.access.group`` metadata referring to one of the access groups of a loop's
+``llvm.loop.parallel_accesses`` metadata, then the loop has no loop carried
+memory dependencies and is considered to be a parallel loop. If there is a
+loop-carried dependency, the behavior is undefined.
 
 Note that if not all memory access instructions belong to an access
 group referred to by ``llvm.loop.parallel_accesses``, then the loop must

>From d95d5108e660748d02f540d40707a87fd1d3dc7b Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Fri, 13 Feb 2026 17:54:15 +0200
Subject: [PATCH 8/8] Fix formatting

---
 llvm/lib/Analysis/LoopInfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 5db4f0771d5bd..4bad9381f4b38 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -618,7 +618,7 @@ bool Loop::isAnnotatedParallel() const {
         if (AI) {
           MDNode *AccessGroup = AI->getMetadata(LLVMContext::MD_access_group);
           if (AI && !contains(AI) &&
-                  (!AccessGroup || !ContainsAccessGroup(AccessGroup)))
+              (!AccessGroup || !ContainsAccessGroup(AccessGroup)))
             return false;
         }
       }