[llvm] [LoopInfo] Don't recognize loop as parallel if it stores to out-of-loop alloca (PR #180551)
Julius Ikkala via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 9 07:51:59 PST 2026
https://github.com/juliusikkala created https://github.com/llvm/llvm-project/pull/180551
Fixes #179272. Related discussion on Discourse: https://discourse.llvm.org/t/semantics-of-llvm-loop-parallel-accesses-and-interaction-with-alloca/89714
This is just the first step that fixes the miscompile by limiting the application of `isAnnotatedParallel()`. Ideally, we'd be able to allow this metadata on loops that use `alloca` as well, but that will need some special handling in the loop vectorizer to be able to do safely.
>From b626e7e07409d093f9f69aeaa204c529fde11bcc Mon Sep 17 00:00:00 2001
From: Julius Ikkala <julius.ikkala at tuni.fi>
Date: Mon, 9 Feb 2026 17:30:54 +0200
Subject: [PATCH] [LoopInfo] Don't recognize loop as parallel if it stores to
out-of-loop alloca
---
llvm/lib/Analysis/LoopInfo.cpp | 15 +++++
.../LoopInfo/annotated-parallel-alloca.ll | 57 +++++++++++++++++++
2 files changed, 72 insertions(+)
create mode 100644 llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index a364b21c64b01..d5203a20c8c6c 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -591,6 +591,21 @@ bool Loop::isAnnotatedParallel() const {
if (!I.mayReadOrWriteMemory())
continue;
+ // If the loop contains a store instruction into an alloca that is outside
+ // of the loop, it is possible that the alloca was initially related to a
+ // loop-local variable but got hoisted outside during e.g. inlining or
+ // some other parallel-loop-unaware pass.
+ //
+ // TODO: Allow metadata to mark 'alloca' as safe to vectorize and
+ // separately handle such allocas in the loop vectorizer, either by
+ // sinking the `alloca` into the loop body or by otherwise "privatizing"
+ // the allocation for each vector lane.
+ if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
+ AllocaInst *AI = findAllocaForValue(SI->getPointerOperand());
+ if (AI && !contains(AI))
+ return false;
+ }
+
if (MDNode *AccessGroup = I.getMetadata(LLVMContext::MD_access_group)) {
auto ContainsAccessGroup = [&ParallelAccessGroups](MDNode *AG) -> bool {
if (AG->getNumOperands() == 0) {
diff --git a/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll b/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll
new file mode 100644
index 0000000000000..b4e5af07950c6
--- /dev/null
+++ b/llvm/test/Analysis/LoopInfo/annotated-parallel-alloca.ll
@@ -0,0 +1,57 @@
+; RUN: opt -passes='print<loops>' -disable-output %s 2>&1 | FileCheck %s
+;
+; void func(long n, long *A) {
+; #pragma clang loop vectorize(assume_safety)
+; for (long i = 0; i < n; i += 1) {
+; long t[32];
+; for (long j = 0; j < 32; j += 1)
+; t[j] = i;
+; A[i] = t[i];
+; }
+; }
+;
+; The alloca for `t` usually gets hoisted outside of the loop (either by Clang
+; itself, or by an inlining pass if the loop body is in a function, etc.) and
+; gets incorrectly shared between iterations. Check that isAnnotatedParallel is
+; blocking this kind of usage, as it will not get vectorized correctly unless
+; mem2reg converts the array.
+;
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @func(i64 %n, ptr noalias nonnull %A) {
+entry:
+ %t = alloca [32 x i64], align 16
+ %cmp17 = icmp sgt i64 %n, 0
+ br i1 %cmp17, label %for.body, label %for.cond.cleanup
+
+for.body:
+ %i.018 = phi i64 [ %add8, %for.cond.cleanup3 ], [ 0, %entry ]
+ br label %for.body4
+
+for.body4:
+ %j.016 = phi i64 [ 0, %for.body ], [ %add, %for.body4 ]
+ %arrayidx = getelementptr inbounds nuw i64, ptr %t, i64 %j.016
+ store i64 %i.018, ptr %arrayidx, align 8, !llvm.access.group !9
+ %add = add nuw nsw i64 %j.016, 1
+ %exitcond.not = icmp eq i64 %add, 32
+ br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4
+
+for.cond.cleanup3:
+ %arrayidx5 = getelementptr inbounds nuw i64, ptr %t, i64 %i.018
+ %0 = load i64, ptr %arrayidx5, align 8, !llvm.access.group !9
+ %arrayidx6 = getelementptr inbounds nuw i64, ptr %A, i64 %i.018
+ store i64 %0, ptr %arrayidx6, align 8, !llvm.access.group !9
+ %add8 = add nuw nsw i64 %i.018, 1
+ %exitcond19.not = icmp eq i64 %add8, %n
+ br i1 %exitcond19.not, label %for.cond.cleanup, label %for.body, !llvm.loop !10
+
+for.cond.cleanup:
+ ret void
+}
+
+!9 = distinct !{}
+!10 = distinct !{!10, !11}
+!11 = !{!"llvm.loop.parallel_accesses", !9}
+
+; CHECK: Loop info for function 'func':
+; CHECK-NOT: Parallel Loop at depth 1 containing:
More information about the llvm-commits
mailing list