[llvm] [LoopInfo] Don't recognize loop as parallel if it stores to out-of-loop alloca (PR #180551)

Mon Feb 9 07:51:59 PST 2026

https://github.com/juliusikkala created https://github.com/llvm/llvm-project/pull/180551

Fixes #179272. Related discussion on Discourse: https://discourse.llvm.org/t/semantics-of-llvm-loop-parallel-accesses-and-interaction-with-alloca/89714

This is just the first step that fixes the miscompile by limiting the application of `isAnnotatedParallel()`. Ideally, we'd be able to allow this metadata on loops that use `alloca` as well, but that will need some special handling in the loop vectorizer to be able to do safely.

>From b626e7e07409d093f9f69aeaa204c529fde11bcc Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Mon, 9 Feb 2026 17:30:54 +0200
Subject: [PATCH] [LoopInfo] Don't recognize loop as parallel if it stores to
 out-of-loop alloca

---
 llvm/lib/Analysis/LoopInfo.cpp                | 15 +++++
 .../LoopInfo/annotated-parallel-alloca.ll     | 57 +++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll

diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index a364b21c64b01..d5203a20c8c6c 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -591,6 +591,21 @@ bool Loop::isAnnotatedParallel() const {
       if (!I.mayReadOrWriteMemory())
         continue;
 
+      // If the loop contains a store instruction into an alloca that is outside
+      // of the loop, it is possible that the alloca was initially related to a
+      // loop-local variable but got hoisted outside during e.g. inlining or
+      // some other parallel-loop-unaware pass.
+      //
+      // TODO: Allow metadata to mark 'alloca' as safe to vectorize and
+      // separately handle such allocas in the loop vectorizer, either by
+      // sinking the `alloca` into the loop body or by otherwise "privatizing"
+      // the allocation for each vector lane.
+      if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+        AllocaInst *AI = findAllocaForValue(SI->getPointerOperand());
+        if (AI && !contains(AI))
+          return false;
+      }
+
       if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
         auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
           if (AG->getNumOperands() == 0) {
diff --git a/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll b/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll
new file mode 100644
index 0000000000000..b4e5af07950c6
--- /dev/null
+++ b/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll
@@ -0,0 +1,57 @@
+; RUN: opt -passes='print<loops>' -disable-output %s 2>&1 | FileCheck %s
+;
+; void func(long n, long *A) {
+;   #pragma clang loop vectorize(assume_safety)
+;   for (long i = 0; i < n; i += 1) {
+;     long t[32];
+;     for (long j = 0; j < 32; j += 1)
+;       t[j] = i;
+;     A[i] = t[i];
+;   }
+; }
+;
+; The alloca for `t` usually gets hoisted outside of the loop (either by Clang
+; itself, or by an inlining pass if the loop body is in a function, etc.) and
+; gets incorrectly shared between iterations. Check that isAnnotatedParallel is
+; blocking this kind of usage, as it will not get vectorized correctly unless
+; mem2reg converts the array.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @func(i64 %n, ptr noalias nonnull %A) {
+entry:
+  %t = alloca [32 x i64], align 16
+  %cmp17 = icmp sgt i64 %n, 0
+  br i1 %cmp17, label %for.body, label %for.cond.cleanup
+
+for.body:
+  %i.018 = phi i64 [ %add8, %for.cond.cleanup3 ], [ 0, %entry ]
+  br label %for.body4
+
+for.body4:
+  %j.016 = phi i64 [ 0, %for.body ], [ %add, %for.body4 ]
+  %arrayidx = getelementptr inbounds nuw i64, ptr %t, i64 %j.016
+  store i64 %i.018, ptr %arrayidx, align 8, !llvm.access.group !9
+  %add = add nuw nsw i64 %j.016, 1
+  %exitcond.not = icmp eq i64 %add, 32
+  br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4
+
+for.cond.cleanup3:
+  %arrayidx5 = getelementptr inbounds nuw i64, ptr %t, i64 %i.018
+  %0 = load i64, ptr %arrayidx5, align 8, !llvm.access.group !9
+  %arrayidx6 = getelementptr inbounds nuw i64, ptr %A, i64 %i.018
+  store i64 %0, ptr %arrayidx6, align 8, !llvm.access.group !9
+  %add8 = add nuw nsw i64 %i.018, 1
+  %exitcond19.not = icmp eq i64 %add8, %n
+  br i1 %exitcond19.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10
+
+for.cond.cleanup:
+  ret void
+}
+
+!9 = distinct !{}
+!10 = distinct !{!10, !11}
+!11 = !{!"llvm.loop.parallel_accesses", !9}
+
+; CHECK: Loop info for function 'func':
+; CHECK-NOT: Parallel Loop at depth 1 containing: