[llvm] Prevent LCSSA from creating phi operands for lifetime intrinsics. (PR #172929)
Chang Lin via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 11 00:55:22 PDT 2026
https://github.com/clin111 updated https://github.com/llvm/llvm-project/pull/172929
>From f4e8366b9a085da7bc3bd1eeb4749141cfb39da0 Mon Sep 17 00:00:00 2001
From: Chang-Sun Lin Jr <chang-sun.lin.jr at intel.com>
Date: Thu, 18 Dec 2025 15:09:45 -0800
Subject: [PATCH 1/5] If an alloca has a lifetime intrinsic User outside of a
loop, preserve LCSAA by removing all the lifetime intrinsics for the alloca.
This prevents a problematic phi operand from being created for the lifetime intrinsic, and avoids potential stack coloring issues.
---
llvm/lib/Transforms/Utils/LCSSA.cpp | 33 +++++++++++++++++++
.../Transforms/LCSSA/lifetime-intrinsic.ll | 30 +++++++++++++++++
2 files changed, 63 insertions(+)
create mode 100644 llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index a9e08ada82ca0..7a6c1b6b2c1f4 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -41,6 +41,7 @@
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/PredIteratorCache.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
@@ -75,6 +76,36 @@ static bool isExitBlock(BasicBlock *BB,
// expensive, and we're not mutating the loop structure.
using LoopExitBlocksTy = SmallDenseMap<Loop *, SmallVector<BasicBlock *, 1>>;
+// If I is an alloca with lifetime intrinsics that are live out of the loop,
+// remove all the lifetime intrinsics for I.
+// This ensures we don't create a lifetime intrinsic based on an LCSSA phi,
+// and avoids potential lifetime inconsistencies.
+static void fixLifetimeIntrinsics(Instruction *I,
+ SmallVectorImpl<Use *> &UsesToRewrite) {
+ if (!isa<AllocaInst>(I))
+ return;
+ bool RemovedAny = false;
+ // First, remove lifetime intrinsics from UsesToRewrite.
+ llvm::erase_if(UsesToRewrite, [&](Use *U) {
+ if (auto *II = dyn_cast<IntrinsicInst>(U->getUser())) {
+ if (II->isLifetimeStartOrEnd()) {
+ RemovedAny = true;
+ return true;
+ }
+ }
+ return false;
+ });
+
+ // Ensure consistency in the simplest way, by removing all lifetime uses
+ // of I.
+ if (RemovedAny) {
+ for (auto *U : make_early_inc_range(I->users()))
+ if (auto *II = dyn_cast<IntrinsicInst>(U))
+ if (II->isLifetimeStartOrEnd())
+ II->eraseFromParent();
+ }
+}
+
/// For every instruction from the worklist, check to see if it has any uses
/// that are outside the current loop. If so, insert LCSSA PHI nodes and
/// rewrite the uses.
@@ -126,6 +157,8 @@ formLCSSAForInstructionsImpl(SmallVectorImpl<Instruction *> &Worklist,
UsesToRewrite.push_back(&U);
}
+ fixLifetimeIntrinsics(I, UsesToRewrite);
+
// If there are no uses outside the loop, exit with no change.
if (UsesToRewrite.empty())
continue;
diff --git a/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll b/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll
new file mode 100644
index 0000000000000..fbdd4f4afa26f
--- /dev/null
+++ b/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=lcssa -S < %s | FileCheck %s
+
+; Previously crashing in the verifier, due to LCSSA inserting a phi between
+; the alloca and the lifetime intrinsic.
+; We can instead remove the problematic intrinsic and its corresponding start.
+
+declare void @llvm.lifetime.end.p0(ptr captures(none))
+declare void @llvm.lifetime.start.p0(i64, ptr captures(none))
+
+define fastcc i32 @test() {
+; CHECK-LABEL: define fastcc i32 @test() {
+; CHECK-NEXT: br label %[[BB1:.*]]
+; CHECK: [[BB1]]:
+; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: br i1 false, label %[[BB2:.*]], label %[[BB1]]
+; CHECK: [[BB2]]:
+; CHECK-NEXT: ret i32 0
+;
+ br label %1
+
+1: ; preds = %1, %0
+ %a = alloca i32, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr %a)
+ br i1 false, label %3, label %1
+
+3: ; preds = %1
+ call void @llvm.lifetime.end.p0(ptr %a)
+ ret i32 0
+}
>From 3f5b2b5dd4968494180b07d8539ba7a03aee573d Mon Sep 17 00:00:00 2001
From: Chang-Sun Lin Jr <chang-sun.lin.jr at intel.com>
Date: Wed, 14 Jan 2026 16:18:43 -0800
Subject: [PATCH 2/5] Alternative version of fix. Ignore dangling lifetime
intrinsics in LCSSA. Remove them as needed, from the unroll passes.
---
llvm/include/llvm/Analysis/LoopInfo.h | 13 +--
.../include/llvm/Transforms/Utils/LoopUtils.h | 6 ++
llvm/lib/Analysis/LoopInfo.cpp | 11 ++-
.../lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 16 ++--
llvm/lib/Transforms/Utils/LCSSA.cpp | 81 +++++++++++--------
llvm/lib/Transforms/Utils/LoopPeel.cpp | 7 ++
llvm/lib/Transforms/Utils/LoopUnroll.cpp | 7 ++
.../lib/Transforms/Utils/LoopUnrollAndJam.cpp | 5 ++
.../Transforms/LCSSA/lifetime-intrinsic.ll | 8 +-
.../Transforms/LoopUnroll/unroll-alloca.ll | 59 ++++++++++++++
.../LoopUnrollAndJam/unroll-and-jam-alloca.ll | 36 +++++++++
11 files changed, 199 insertions(+), 50 deletions(-)
create mode 100644 llvm/test/Transforms/LoopUnroll/unroll-alloca.ll
create mode 100644 llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam-alloca.ll
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 0ecb1141dc1be..cf8691e913eea 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -310,15 +310,16 @@ class LLVM_ABI Loop : public LoopBase<BasicBlock, Loop> {
/// by one each time through the loop.
bool isCanonical(ScalarEvolution &SE) const;
- /// Return true if the Loop is in LCSSA form. If \p IgnoreTokens is set to
- /// true, token values defined inside loop are allowed to violate LCSSA form.
- bool isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens = true) const;
+ /// Return true if the Loop is in LCSSA form. If \p IgnoreEphemerals is set to
+ /// true, token and lifetime uses outside the loop are allowed to violate
+ /// LCSSA form.
+ bool isLCSSAForm(const DominatorTree &DT, bool IgnoreEphemerals = true) const;
/// Return true if this Loop and all inner subloops are in LCSSA form. If \p
- /// IgnoreTokens is set to true, token values defined inside loop are allowed
- /// to violate LCSSA form.
+ /// IgnoreEphemerals is set to true, token and lifetime uses outside the loop
+ /// are allowed to violate LCSSA form.
bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI,
- bool IgnoreTokens = true) const;
+ bool IgnoreEphemerals = true) const;
/// Return true if the Loop is in the form that the LoopSimplify form
/// transforms loops to, which is sometimes called normal form.
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 0afba21dfaf81..17f85487e1cd2 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -66,6 +66,12 @@ LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
MemorySSAUpdater *MSSAU,
bool PreserveLCSSA);
+/// Ensure strict LCSSA form for the given loop, by removing lifetime
+/// intrinsics that are used outside the loop.
+///
+/// Returns true if any modifications are made.
+LLVM_ABI bool cleanupDanglingLifetimeUsers(Loop *L, const DominatorTree &DT);
+
/// Ensures LCSSA form for every instruction from the Worklist in the scope of
/// innermost containing loop.
///
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index d84721b7f8f4b..5a7fc8862a724 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -430,12 +430,12 @@ bool Loop::isCanonical(ScalarEvolution &SE) const {
// Check that 'BB' doesn't have any uses outside of the 'L'
static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
- const DominatorTree &DT, bool IgnoreTokens) {
+ const DominatorTree &DT, bool IgnoreEphemerals) {
for (const Instruction &I : BB) {
// Tokens can't be used in PHI nodes and live-out tokens prevent loop
// optimizations, so for the purposes of considered LCSSA form, we
// can ignore them.
- if (IgnoreTokens && I.getType()->isTokenTy())
+ if (IgnoreEphemerals && I.getType()->isTokenTy())
continue;
for (const Use &U : I.uses()) {
@@ -448,6 +448,13 @@ static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
if (const PHINode *P = dyn_cast<PHINode>(UI))
UserBB = P->getIncomingBlock(U);
+ // lifetime intrinsics are also considered to be non-uses for
+ // the purposes of LCSSA form.
+ if (IgnoreEphemerals)
+ if (auto *II = dyn_cast<IntrinsicInst>(UI))
+ if (II->isLifetimeStartOrEnd())
+ continue;
+
// Check the current block, as a fast-path, before checking whether
// the use is anywhere in the loop. Most values are used in the same
// block they are defined in. Also, blocks not reachable from the
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index e902b71776973..a0b66cd59b968 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -608,15 +608,17 @@ class ConstantTerminatorFoldingImpl {
return false;
}
- // TODO: Tokens may breach LCSSA form by default. However, the transform for
- // dead exit blocks requires LCSSA form to be maintained for all values,
- // tokens included, otherwise it may break use-def dominance (see PR56243).
- if (!DeadExitBlocks.empty() && !L.isLCSSAForm(DT, /*IgnoreTokens*/ false)) {
- assert(L.isLCSSAForm(DT, /*IgnoreTokens*/ true) &&
- "LCSSA broken not by tokens?");
+ // TODO: Tokens/lifetime intrinsics may breach LCSSA form by default.
+ // However, the transform for dead exit blocks requires LCSSA form to be
+ // maintained for all values, tokens included, otherwise it may break
+ // use-def dominance (see PR56243).
+ if (!DeadExitBlocks.empty() &&
+ !L.isLCSSAForm(DT, /*IgnoreEphemerals*/ false)) {
+ assert(L.isLCSSAForm(DT, /*IgnoreEphemerals*/ true) && "LCSSA broken?");
LLVM_DEBUG(dbgs() << "Give up constant terminator folding in loop "
<< Header->getName()
- << ": tokens uses potentially break LCSSA form.\n");
+ << ": Tokens/lifetime markers potentially break "
+ << " LCSSA form.\n");
return false;
}
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index 7a6c1b6b2c1f4..eb543c6dfcce2 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -76,36 +76,6 @@ static bool isExitBlock(BasicBlock *BB,
// expensive, and we're not mutating the loop structure.
using LoopExitBlocksTy = SmallDenseMap<Loop *, SmallVector<BasicBlock *, 1>>;
-// If I is an alloca with lifetime intrinsics that are live out of the loop,
-// remove all the lifetime intrinsics for I.
-// This ensures we don't create a lifetime intrinsic based on an LCSSA phi,
-// and avoids potential lifetime inconsistencies.
-static void fixLifetimeIntrinsics(Instruction *I,
- SmallVectorImpl<Use *> &UsesToRewrite) {
- if (!isa<AllocaInst>(I))
- return;
- bool RemovedAny = false;
- // First, remove lifetime intrinsics from UsesToRewrite.
- llvm::erase_if(UsesToRewrite, [&](Use *U) {
- if (auto *II = dyn_cast<IntrinsicInst>(U->getUser())) {
- if (II->isLifetimeStartOrEnd()) {
- RemovedAny = true;
- return true;
- }
- }
- return false;
- });
-
- // Ensure consistency in the simplest way, by removing all lifetime uses
- // of I.
- if (RemovedAny) {
- for (auto *U : make_early_inc_range(I->users()))
- if (auto *II = dyn_cast<IntrinsicInst>(U))
- if (II->isLifetimeStartOrEnd())
- II->eraseFromParent();
- }
-}
-
/// For every instruction from the worklist, check to see if it has any uses
/// that are outside the current loop. If so, insert LCSSA PHI nodes and
/// rewrite the uses.
@@ -147,6 +117,13 @@ formLCSSAForInstructionsImpl(SmallVectorImpl<Instruction *> &Worklist,
continue;
}
+ // Ignore lifetime intrinsics, instead of creating LCSSA phis for them.
+ // The intrinsics can be removed later by a call to
+ // cleanupDanglingLifetimeUsers.
+ if (auto *II = dyn_cast<IntrinsicInst>(User))
+ if (II->isLifetimeStartOrEnd())
+ continue;
+
// For practical purposes, we consider that the use in a PHI
// occurs in the respective predecessor block. For more info,
// see the `phi` doc in LangRef and the LCSSA doc.
@@ -157,8 +134,6 @@ formLCSSAForInstructionsImpl(SmallVectorImpl<Instruction *> &Worklist,
UsesToRewrite.push_back(&U);
}
- fixLifetimeIntrinsics(I, UsesToRewrite);
-
// If there are no uses outside the loop, exit with no change.
if (UsesToRewrite.empty())
continue;
@@ -392,6 +367,48 @@ static void computeBlocksDominatingExits(
}
}
+/// Ensure strict LCSSA form for the given loop, by removing lifetime
+/// intrinsics that are used outside the loop.
+///
+/// Returns true if any modifications are made.
+bool llvm::cleanupDanglingLifetimeUsers(Loop *L, const DominatorTree &DT) {
+ SmallVector<BasicBlock *, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+
+ if (ExitBlocks.empty())
+ return false;
+
+ // Look only at allocas that dominate the loop exits.
+ SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
+ computeBlocksDominatingExits(*L, DT, ExitBlocks, BlocksDominatingExits);
+
+ SmallVector<Instruction *, 8> ToRemove;
+ bool Changed = false;
+
+ for (auto *BB : BlocksDominatingExits) {
+ for (auto &I : *BB) {
+ if (auto *AI = dyn_cast<AllocaInst>(&I)) {
+ for (Use &U : AI->uses()) {
+ auto *User = cast<Instruction>(U.getUser());
+
+ if (L->contains(User->getParent()))
+ continue;
+
+ if (User->isLifetimeStartOrEnd())
+ ToRemove.push_back(User);
+ }
+ }
+ }
+ }
+
+ for (Instruction *I : ToRemove) {
+ I->eraseFromParent();
+ Changed = true;
+ }
+
+ return Changed;
+}
+
static bool formLCSSAImpl(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
ScalarEvolution *SE,
LoopExitBlocksTy &LoopExitBlocks) {
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 960ec9d4c7d6e..50a6723541991 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -1117,6 +1117,13 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
L->getExitEdges(ExitEdges);
+ // LCSSA allows lifetime intrinsics and tokens to directly use loop
+ // instructions, as they cannot use a phi.
+ // Cloning loop blocks requires a phi join; just remove the problematic
+ // instructions.
+ if (cleanupDanglingLifetimeUsers(L, DT))
+ LLVM_DEBUG(dbgs() << "Peeling: removed dangling lifetime users.\n");
+
// Remember dominators of blocks we might reach through exits to change them
// later. Immediate dominator of such block might change, because we add more
// routes which can lead to the exit: we can reach it from the peeled
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 0f256398e5b1e..cc9b7579f7f19 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -555,6 +555,13 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (CompletelyUnroll)
ULO.Runtime = false;
+ // LCSSA allows lifetime intrinsics and tokens to directly use loop
+ // instructions, as they cannot use a phi.
+ // Cloning loop blocks requires a phi join; just remove the problematic
+ // instructions.
+ if (cleanupDanglingLifetimeUsers(L, *DT))
+ LLVM_DEBUG(dbgs() << "Unroll: removed dangling lifetime users.\n");
+
// Go through all exits of L and see if there are any phi-nodes there. We just
// conservatively assume that they're inserted to preserve LCSSA form, which
// means that complete unrolling might break this form. We need to either fix
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index 1e614bd29ee6e..9b2fb626c857f 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -256,6 +256,11 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
SE->forgetBlockAndLoopDispositions();
}
+ // Remove problematic lifetime intrinsics outside the loop.
+ // Avoids the need to merge allocas with a phi.
+ if (cleanupDanglingLifetimeUsers(L, *DT))
+ LLVM_DEBUG(dbgs() << "U&J: removed dangling lifetime users.\n");
+
using namespace ore;
// Report the unrolling decision.
if (CompletelyUnroll) {
diff --git a/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll b/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll
index fbdd4f4afa26f..18165e9a7664d 100644
--- a/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll
+++ b/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -passes=lcssa -S < %s | FileCheck %s
-; Previously crashing in the verifier, due to LCSSA inserting a phi between
-; the alloca and the lifetime intrinsic.
-; We can instead remove the problematic intrinsic and its corresponding start.
+; LCSSA should ignore the lifetime intrinsics, as we cannot create phis
+; for them. If the client needs strict LCSSA form, they can call
+; cleanupDanglingLifetimeUsers afterwards.
declare void @llvm.lifetime.end.p0(ptr captures(none))
declare void @llvm.lifetime.start.p0(i64, ptr captures(none))
@@ -13,8 +13,10 @@ define fastcc i32 @test() {
; CHECK-NEXT: br label %[[BB1:.*]]
; CHECK: [[BB1]]:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[A]])
; CHECK-NEXT: br i1 false, label %[[BB2:.*]], label %[[BB1]]
; CHECK: [[BB2]]:
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(ptr [[A]])
; CHECK-NEXT: ret i32 0
;
br label %1
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll b/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll
new file mode 100644
index 0000000000000..10e8238a510fd
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll
@@ -0,0 +1,59 @@
+; RUN: opt -unroll-allow-partial -unroll-runtime -passes="loop-unroll<O3>" -S < %s | FileCheck %s
+
+; Various scenarios where a loop has an alloca with a live-out lifetime use.
+; LCSSA can't help here because a lifetime marker can't have a phi
+; definition.
+; Remove the lifetime marker(s) before unrolling, to prevent multiple defs
+; and a single use.
+
+; CHECK-LABEL: @peelit
+; CHECK: alloca i32
+; CHECK-NOT: call{{.*}}@llvm.lifetime
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(ptr captures(none)) #0
+declare void @llvm.lifetime.end.p0(ptr captures(none)) #0
+
+define fastcc i32 @peelit() {
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %phi = phi i64 [ 1, %bb ], [ 0, %bb1 ]
+ %alloca = alloca i32, align 4
+ br i1 false, label %bb2, label %bb1
+
+bb2: ; preds = %bb1
+ call void @llvm.lifetime.start.p0(ptr %alloca)
+ unreachable
+}
+
+; CHECK-LABEL: @partial
+; CHECK: call{{.*}}umax
+; CHECK: alloca i32
+; CHECK-NOT: call{{.*}}@llvm.lifetime.end
+
+define fastcc i32 @partial(i32 %max) {
+bb:
+ br label %bb1
+
+bb1: ; preds = %bb1, %bb
+ %phic = phi i32 [ 0, %bb ], [ %next, %bb1 ]
+ %alloca = alloca i32, align 8
+ call void @llvm.lifetime.start.p0(ptr %alloca)
+ store i32 %phic, ptr %alloca, align 8
+ %next = add i32 %phic, 1
+ %cmp = icmp ult i32 %next, %max
+ br i1 %cmp, label %bb1, label %bb2
+
+bb2: ; preds = %bb1
+ call void @llvm.lifetime.end.p0(ptr %alloca)
+ ret i32 %phic
+}
+
+; full unroll is OK because there's no merge point.
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam-alloca.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam-alloca.ll
new file mode 100644
index 0000000000000..deda42b735971
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam-alloca.ll
@@ -0,0 +1,36 @@
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+; RUN: opt -passes=loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s
+
+; The alloca and lifetime use are problematic here, because the unroll
+; duplication will require a merge. A lifetime intrinsic can't have a
+; phi definition. The lifetime intrinsic can be removed.
+
+; CHECK: [[xtraiter:%.+]] = and i32 {{.*}}, 3
+; CHECK-NOT: call{{.*}}@llvm.lifetime
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(ptr captures(none)) #0
+
+define void @test1(i32 %I) {
+entry:
+ br label %for.outer
+
+for.outer: ; preds = %for.latch, %entry
+ %i = phi i32 [ %add8, %for.latch ], [ 0, %entry ]
+ br label %for.inner
+
+for.inner: ; preds = %for.inner, %for.outer
+ %alloca = alloca i32, align 4
+ br i1 true, label %for.latch, label %for.inner
+
+for.latch: ; preds = %for.inner
+ %add8 = add i32 %i, 1
+ %exitcond25 = icmp eq i32 %i, %I
+ br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit: ; preds = %for.latch
+ call void @llvm.lifetime.start.p0(ptr %alloca)
+ ret void
+}
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
>From db38aa81df9889ac145d5a5abd4c1d63b449554d Mon Sep 17 00:00:00 2001
From: Chang-Sun Lin Jr <chang-sun.lin.jr at intel.com>
Date: Wed, 14 Jan 2026 23:33:45 -0800
Subject: [PATCH 3/5] Fix up vectorization pass also.
---
.../Transforms/Vectorize/LoopVectorize.cpp | 5 ++
.../LoopVectorize/alloca-lifetime.ll | 59 +++++++++++++++++++
2 files changed, 64 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3f1e12e5d1cd0..8451d446f60d5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10183,6 +10183,11 @@ LoopVectorizeResult LoopVectorizePass::runImpl(Function &F) {
// transform.
Changed |= formLCSSARecursively(*L, *DT, LI, SE);
+ // Remove problematic lifetime intrinsics outside the loop.
+ // Avoids the need to merge allocas with a phi.
+ if (cleanupDanglingLifetimeUsers(L, *DT))
+ LLVM_DEBUG(dbgs() << "Vectorizer: removed dangling lifetime users.\n");
+
Changed |= CFGChanged |= processLoop(L);
if (Changed) {
diff --git a/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll b/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll
new file mode 100644
index 0000000000000..630ef9723ac68
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S 2>&1 | FileCheck %s
+
+; The alloca is live-out of the loop. The lifetime intrinsics can be removed
+; before vectorization, to avoid SSA violations.
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+declare void @llvm.lifetime.start.p0(ptr nocapture) nounwind
+declare void @llvm.lifetime.end.p0(ptr nocapture) nounwind
+
+; CHECK-LABEL: @live_alloca
+; CHECK: 4 x i32
+; CHECK-NOT: call {{.*}} @llvm.lifetime.end
+define i32 @live_alloca(ptr %a, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+ %r = phi i32 [ %tmp3, %for.body ], [ 0, %entry ]
+ %alloca = alloca i32, align 4
+ call void @llvm.lifetime.start.p0(ptr %alloca)
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp slt i64 %i.next, %n
+ %tmp0 = select i1 %cond, i64 %i.next, i64 0
+ %tmp1 = getelementptr inbounds i32, ptr %a, i64 %tmp0
+ %tmp2 = load i32, ptr %tmp1, align 8
+ %tmp3 = add i32 %r, %tmp2
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ %tmp4 = phi i32 [ %tmp3, %for.body ]
+ call void @llvm.lifetime.end.p0(ptr %alloca)
+ ret i32 %tmp4
+}
+
+; CHECK-LABEL: @live_alloca2
+; CHECK: alloca i32
+; CHECK-NOT: call {{.*}} @llvm.lifetime.end
+define void @live_alloca2(ptr %ptr) {
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %alloca = alloca i32, align 4
+ call void @llvm.lifetime.start.p0(ptr %alloca)
+ %cond0 = icmp ult i64 %iv, 13
+ %s = select i1 %cond0, i32 10, i32 20
+ %gep = getelementptr inbounds i32, ptr %ptr, i64 %iv
+ store i32 %s, ptr %gep
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 14
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ call void @llvm.lifetime.end.p0(ptr %alloca)
+ ret void
+}
>From 257cf487e01fc9fdeb66fb30e80bede753cec24f Mon Sep 17 00:00:00 2001
From: Chang-Sun Lin Jr <chang-sun.lin.jr at intel.com>
Date: Tue, 20 Jan 2026 15:20:12 -0800
Subject: [PATCH 4/5] Remove all associated lifetime markers when removing one
marker.
---
.../include/llvm/Transforms/Utils/LoopUtils.h | 3 +-
llvm/lib/Transforms/Utils/LCSSA.cpp | 36 +++++++++----------
.../Transforms/LoopUnroll/unroll-alloca.ll | 2 +-
.../LoopVectorize/alloca-lifetime.ll | 7 ++--
4 files changed, 24 insertions(+), 24 deletions(-)
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index de6c69e177086..bffdc104be709 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -66,7 +66,8 @@ LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
bool PreserveLCSSA);
/// Ensure strict LCSSA form for the given loop, by removing lifetime
-/// intrinsics that are used outside the loop.
+/// intrinsics that cross the exit boundary, as well as their associated
+/// partners.
///
/// Returns true if any modifications are made.
LLVM_ABI bool cleanupDanglingLifetimeUsers(Loop *L, const DominatorTree &DT);
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index eb543c6dfcce2..d047da150f172 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -368,7 +368,8 @@ static void computeBlocksDominatingExits(
}
/// Ensure strict LCSSA form for the given loop, by removing lifetime
-/// intrinsics that are used outside the loop.
+/// intrinsics that cross the exit boundary, as well as their associated
+/// partners.
///
/// Returns true if any modifications are made.
bool llvm::cleanupDanglingLifetimeUsers(Loop *L, const DominatorTree &DT) {
@@ -378,35 +379,34 @@ bool llvm::cleanupDanglingLifetimeUsers(Loop *L, const DominatorTree &DT) {
if (ExitBlocks.empty())
return false;
- // Look only at allocas that dominate the loop exits.
+ // Look only at allocas in the loop, that dominate the loop exits.
SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
computeBlocksDominatingExits(*L, DT, ExitBlocks, BlocksDominatingExits);
- SmallVector<Instruction *, 8> ToRemove;
- bool Changed = false;
+ SmallVector<AllocaInst *, 8> ToClean;
for (auto *BB : BlocksDominatingExits) {
for (auto &I : *BB) {
if (auto *AI = dyn_cast<AllocaInst>(&I)) {
- for (Use &U : AI->uses()) {
- auto *User = cast<Instruction>(U.getUser());
-
- if (L->contains(User->getParent()))
- continue;
-
- if (User->isLifetimeStartOrEnd())
- ToRemove.push_back(User);
- }
+ bool LifetimeOutsideLoop = llvm::any_of(AI->users(), [&](User *U) {
+ auto *Inst = cast<Instruction>(U);
+ return Inst->isLifetimeStartOrEnd() &&
+ !L->contains(Inst->getParent());
+ });
+
+ if (LifetimeOutsideLoop)
+ ToClean.push_back(AI);
}
}
}
- for (Instruction *I : ToRemove) {
- I->eraseFromParent();
- Changed = true;
- }
+ for (auto *AI : ToClean)
+ for (auto *U : make_early_inc_range(AI->users()))
+ if (auto *II = dyn_cast<IntrinsicInst>(U))
+ if (II->isLifetimeStartOrEnd())
+ II->eraseFromParent();
- return Changed;
+ return !ToClean.empty();
}
static bool formLCSSAImpl(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll b/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll
index 10e8238a510fd..db63983e9b598 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll
@@ -34,7 +34,7 @@ bb2: ; preds = %bb1
; CHECK-LABEL: @partial
; CHECK: call{{.*}}umax
; CHECK: alloca i32
-; CHECK-NOT: call{{.*}}@llvm.lifetime.end
+; CHECK-NOT: call{{.*}}@llvm.lifetime
define fastcc i32 @partial(i32 %max) {
bb:
diff --git a/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll b/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll
index 630ef9723ac68..c5e19d1ac279d 100644
--- a/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll
+++ b/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll
@@ -9,8 +9,8 @@ declare void @llvm.lifetime.start.p0(ptr nocapture) nounwind
declare void @llvm.lifetime.end.p0(ptr nocapture) nounwind
; CHECK-LABEL: @live_alloca
-; CHECK: 4 x i32
-; CHECK-NOT: call {{.*}} @llvm.lifetime.end
+; CHECK: 4 x i64
+; CHECK-NOT: call {{.*}} @llvm.lifetime
define i32 @live_alloca(ptr %a, i64 %n) {
entry:
br label %for.body
@@ -35,8 +35,7 @@ for.end:
}
; CHECK-LABEL: @live_alloca2
-; CHECK: alloca i32
-; CHECK-NOT: call {{.*}} @llvm.lifetime.end
+; CHECK-NOT: call {{.*}} @llvm.lifetime
define void @live_alloca2(ptr %ptr) {
entry:
br label %loop
>From 687d71ba5a935957a7402f7cc2b004440853e991 Mon Sep 17 00:00:00 2001
From: Chang-Sun Lin Jr <chang-sun.lin.jr at intel.com>
Date: Tue, 10 Mar 2026 15:36:31 -0700
Subject: [PATCH 5/5] fix potential fail: the lifetime intrinsics should only
be removed if LoopUnroll will actually do something (and tell the PM that
analyses are invalidated). Lifetime intrinsics affect MSSA.
---
llvm/lib/Transforms/Utils/LoopUnroll.cpp | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 4a429cf8bf8ed..fef5d5151538e 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -555,13 +555,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (CompletelyUnroll)
ULO.Runtime = false;
- // LCSSA allows lifetime intrinsics and tokens to directly use loop
- // instructions, as they cannot use a phi.
- // Cloning loop blocks requires a phi join; just remove the problematic
- // instructions.
- if (cleanupDanglingLifetimeUsers(L, *DT))
- LLVM_DEBUG(dbgs() << "Unroll: removed dangling lifetime users.\n");
-
// Go through all exits of L and see if there are any phi-nodes there. We just
// conservatively assume that they're inserted to preserve LCSSA form, which
// means that complete unrolling might break this form. We need to either fix
@@ -612,6 +605,13 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
}
+ // LCSSA allows lifetime intrinsics and tokens to directly use loop
+ // instructions, as they cannot use a phi.
+ // Cloning loop blocks requires a phi join; just remove the problematic
+ // instructions.
+ if (cleanupDanglingLifetimeUsers(L, *DT))
+ LLVM_DEBUG(dbgs() << "Unroll: removed dangling lifetime users.\n");
+
using namespace ore;
// Report the unrolling decision.
if (CompletelyUnroll) {
More information about the llvm-commits
mailing list