[llvm] Prevent LCSSA from creating phi operands for lifetime intrinsics. (PR #172929)

Chang Lin via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 11 00:55:22 PDT 2026


https://github.com/clin111 updated https://github.com/llvm/llvm-project/pull/172929

>From f4e8366b9a085da7bc3bd1eeb4749141cfb39da0 Mon Sep 17 00:00:00 2001
From: Chang-Sun Lin Jr <chang-sun.lin.jr at intel.com>
Date: Thu, 18 Dec 2025 15:09:45 -0800
Subject: [PATCH 1/5] If an alloca has a lifetime intrinsic User outside of a
 loop, preserve LCSAA by removing all the lifetime intrinsics for the alloca.

This prevents a problematic phi operand from being created for the lifetime intrinsic, and avoids potential stack coloring issues.
---
 llvm/lib/Transforms/Utils/LCSSA.cpp           | 33 +++++++++++++++++++
 .../Transforms/LCSSA/lifetime-intrinsic.ll    | 30 +++++++++++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll

diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index a9e08ada82ca0..7a6c1b6b2c1f4 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -41,6 +41,7 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PredIteratorCache.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Pass.h"
@@ -75,6 +76,36 @@ static bool isExitBlock(BasicBlock *BB,
 // expensive, and we're not mutating the loop structure.
 using LoopExitBlocksTy = SmallDenseMap<Loop *, SmallVector<BasicBlock *, 1>>;
 
+// If I is an alloca with lifetime intrinsics that are live out of the loop,
+// remove all the lifetime intrinsics for I.
+// This ensures we don't create a lifetime intrinsic based on an LCSSA phi,
+// and avoids potential lifetime inconsistencies.
+static void fixLifetimeIntrinsics(Instruction *I,
+                                  SmallVectorImpl<Use *> &UsesToRewrite) {
+  if (!isa<AllocaInst>(I))
+    return;
+  bool RemovedAny = false;
+  // First, remove lifetime intrinsics from UsesToRewrite.
+  llvm::erase_if(UsesToRewrite, [&](Use *U) {
+    if (auto *II = dyn_cast<IntrinsicInst>(U->getUser())) {
+      if (II->isLifetimeStartOrEnd()) {
+        RemovedAny = true;
+        return true;
+      }
+    }
+    return false;
+  });
+
+  // Ensure consistency in the simplest way, by removing all lifetime uses
+  // of I.
+  if (RemovedAny) {
+    for (auto *U : make_early_inc_range(I->users()))
+      if (auto *II = dyn_cast<IntrinsicInst>(U))
+        if (II->isLifetimeStartOrEnd())
+          II->eraseFromParent();
+  }
+}
+
 /// For every instruction from the worklist, check to see if it has any uses
 /// that are outside the current loop.  If so, insert LCSSA PHI nodes and
 /// rewrite the uses.
@@ -126,6 +157,8 @@ formLCSSAForInstructionsImpl(SmallVectorImpl<Instruction *> &Worklist,
         UsesToRewrite.push_back(&U);
     }
 
+    fixLifetimeIntrinsics(I, UsesToRewrite);
+
     // If there are no uses outside the loop, exit with no change.
     if (UsesToRewrite.empty())
       continue;
diff --git a/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll b/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll
new file mode 100644
index 0000000000000..fbdd4f4afa26f
--- /dev/null
+++ b/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll
@@ -0,0 +1,30 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=lcssa -S < %s | FileCheck %s
+
+; Previously crashing in the verifier, due to LCSSA inserting a phi between
+; the alloca and the lifetime intrinsic.
+; We can instead remove the problematic intrinsic and its corresponding start.
+
+declare void @llvm.lifetime.end.p0(ptr captures(none))
+declare void @llvm.lifetime.start.p0(i64, ptr captures(none))
+
+define fastcc i32 @test() {
+; CHECK-LABEL: define fastcc i32 @test() {
+; CHECK-NEXT:    br label %[[BB1:.*]]
+; CHECK:       [[BB1]]:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br i1 false, label %[[BB2:.*]], label %[[BB1]]
+; CHECK:       [[BB2]]:
+; CHECK-NEXT:    ret i32 0
+;
+  br label %1
+
+1:                                                ; preds = %1, %0
+  %a = alloca i32, align 4
+  call void @llvm.lifetime.start.p0(i64 4, ptr %a)
+  br i1 false, label %3, label %1
+
+3:                                                ; preds = %1
+  call void @llvm.lifetime.end.p0(ptr %a)
+  ret i32 0
+}

>From 3f5b2b5dd4968494180b07d8539ba7a03aee573d Mon Sep 17 00:00:00 2001
From: Chang-Sun Lin Jr <chang-sun.lin.jr at intel.com>
Date: Wed, 14 Jan 2026 16:18:43 -0800
Subject: [PATCH 2/5] Alternative version of fix. Ignore dangling lifetime
 intrinsics in LCSSA. Remove them as needed, from the unroll passes.

---
 llvm/include/llvm/Analysis/LoopInfo.h         | 13 +--
 .../include/llvm/Transforms/Utils/LoopUtils.h |  6 ++
 llvm/lib/Analysis/LoopInfo.cpp                | 11 ++-
 .../lib/Transforms/Scalar/LoopSimplifyCFG.cpp | 16 ++--
 llvm/lib/Transforms/Utils/LCSSA.cpp           | 81 +++++++++++--------
 llvm/lib/Transforms/Utils/LoopPeel.cpp        |  7 ++
 llvm/lib/Transforms/Utils/LoopUnroll.cpp      |  7 ++
 .../lib/Transforms/Utils/LoopUnrollAndJam.cpp |  5 ++
 .../Transforms/LCSSA/lifetime-intrinsic.ll    |  8 +-
 .../Transforms/LoopUnroll/unroll-alloca.ll    | 59 ++++++++++++++
 .../LoopUnrollAndJam/unroll-and-jam-alloca.ll | 36 +++++++++
 11 files changed, 199 insertions(+), 50 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopUnroll/unroll-alloca.ll
 create mode 100644 llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam-alloca.ll

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 0ecb1141dc1be..cf8691e913eea 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -310,15 +310,16 @@ class LLVM_ABI Loop : public LoopBase<BasicBlock, Loop> {
   /// by one each time through the loop.
   bool isCanonical(ScalarEvolution &SE) const;
 
-  /// Return true if the Loop is in LCSSA form. If \p IgnoreTokens is set to
-  /// true, token values defined inside loop are allowed to violate LCSSA form.
-  bool isLCSSAForm(const DominatorTree &DT, bool IgnoreTokens = true) const;
+  /// Return true if the Loop is in LCSSA form. If \p IgnoreEphemerals is set to
+  /// true, token and lifetime uses outside the loop are allowed to violate
+  /// LCSSA form.
+  bool isLCSSAForm(const DominatorTree &DT, bool IgnoreEphemerals = true) const;
 
   /// Return true if this Loop and all inner subloops are in LCSSA form. If \p
-  /// IgnoreTokens is set to true, token values defined inside loop are allowed
-  /// to violate LCSSA form.
+  /// IgnoreEphemerals is set to true, token and lifetime uses outside the loop
+  /// are allowed to violate LCSSA form.
   bool isRecursivelyLCSSAForm(const DominatorTree &DT, const LoopInfo &LI,
-                              bool IgnoreTokens = true) const;
+                              bool IgnoreEphemerals = true) const;
 
   /// Return true if the Loop is in the form that the LoopSimplify form
   /// transforms loops to, which is sometimes called normal form.
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 0afba21dfaf81..17f85487e1cd2 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -66,6 +66,12 @@ LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
                                       MemorySSAUpdater *MSSAU,
                                       bool PreserveLCSSA);
 
+/// Ensure strict LCSSA form for the given loop, by removing lifetime
+/// intrinsics that are used outside the loop.
+///
+/// Returns true if any modifications are made.
+LLVM_ABI bool cleanupDanglingLifetimeUsers(Loop *L, const DominatorTree &DT);
+
 /// Ensures LCSSA form for every instruction from the Worklist in the scope of
 /// innermost containing loop.
 ///
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index d84721b7f8f4b..5a7fc8862a724 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -430,12 +430,12 @@ bool Loop::isCanonical(ScalarEvolution &SE) const {
 
 // Check that 'BB' doesn't have any uses outside of the 'L'
 static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
-                               const DominatorTree &DT, bool IgnoreTokens) {
+                               const DominatorTree &DT, bool IgnoreEphemerals) {
   for (const Instruction &I : BB) {
     // Tokens can't be used in PHI nodes and live-out tokens prevent loop
     // optimizations, so for the purposes of considered LCSSA form, we
     // can ignore them.
-    if (IgnoreTokens && I.getType()->isTokenTy())
+    if (IgnoreEphemerals && I.getType()->isTokenTy())
       continue;
 
     for (const Use &U : I.uses()) {
@@ -448,6 +448,13 @@ static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
       if (const PHINode *P = dyn_cast<PHINode>(UI))
         UserBB = P->getIncomingBlock(U);
 
+      // lifetime intrinsics are also considered to be non-uses for
+      // the purposes of LCSSA form.
+      if (IgnoreEphemerals)
+        if (auto *II = dyn_cast<IntrinsicInst>(UI))
+          if (II->isLifetimeStartOrEnd())
+            continue;
+
       // Check the current block, as a fast-path, before checking whether
       // the use is anywhere in the loop.  Most values are used in the same
       // block they are defined in.  Also, blocks not reachable from the
diff --git a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index e902b71776973..a0b66cd59b968 100644
--- a/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -608,15 +608,17 @@ class ConstantTerminatorFoldingImpl {
       return false;
     }
 
-    // TODO: Tokens may breach LCSSA form by default. However, the transform for
-    // dead exit blocks requires LCSSA form to be maintained for all values,
-    // tokens included, otherwise it may break use-def dominance (see PR56243).
-    if (!DeadExitBlocks.empty() && !L.isLCSSAForm(DT, /*IgnoreTokens*/ false)) {
-      assert(L.isLCSSAForm(DT, /*IgnoreTokens*/ true) &&
-             "LCSSA broken not by tokens?");
+    // TODO: Tokens/lifetime intrinsics may breach LCSSA form by default.
+    // However, the transform for dead exit blocks requires LCSSA form to be
+    // maintained for all values, tokens included, otherwise it may break
+    // use-def dominance (see PR56243).
+    if (!DeadExitBlocks.empty() &&
+        !L.isLCSSAForm(DT, /*IgnoreEphemerals*/ false)) {
+      assert(L.isLCSSAForm(DT, /*IgnoreEphemerals*/ true) && "LCSSA broken?");
       LLVM_DEBUG(dbgs() << "Give up constant terminator folding in loop "
                         << Header->getName()
-                        << ": tokens uses potentially break LCSSA form.\n");
+                        << ": Tokens/lifetime markers potentially break "
+                        << " LCSSA form.\n");
       return false;
     }
 
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index 7a6c1b6b2c1f4..eb543c6dfcce2 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -76,36 +76,6 @@ static bool isExitBlock(BasicBlock *BB,
 // expensive, and we're not mutating the loop structure.
 using LoopExitBlocksTy = SmallDenseMap<Loop *, SmallVector<BasicBlock *, 1>>;
 
-// If I is an alloca with lifetime intrinsics that are live out of the loop,
-// remove all the lifetime intrinsics for I.
-// This ensures we don't create a lifetime intrinsic based on an LCSSA phi,
-// and avoids potential lifetime inconsistencies.
-static void fixLifetimeIntrinsics(Instruction *I,
-                                  SmallVectorImpl<Use *> &UsesToRewrite) {
-  if (!isa<AllocaInst>(I))
-    return;
-  bool RemovedAny = false;
-  // First, remove lifetime intrinsics from UsesToRewrite.
-  llvm::erase_if(UsesToRewrite, [&](Use *U) {
-    if (auto *II = dyn_cast<IntrinsicInst>(U->getUser())) {
-      if (II->isLifetimeStartOrEnd()) {
-        RemovedAny = true;
-        return true;
-      }
-    }
-    return false;
-  });
-
-  // Ensure consistency in the simplest way, by removing all lifetime uses
-  // of I.
-  if (RemovedAny) {
-    for (auto *U : make_early_inc_range(I->users()))
-      if (auto *II = dyn_cast<IntrinsicInst>(U))
-        if (II->isLifetimeStartOrEnd())
-          II->eraseFromParent();
-  }
-}
-
 /// For every instruction from the worklist, check to see if it has any uses
 /// that are outside the current loop.  If so, insert LCSSA PHI nodes and
 /// rewrite the uses.
@@ -147,6 +117,13 @@ formLCSSAForInstructionsImpl(SmallVectorImpl<Instruction *> &Worklist,
         continue;
       }
 
+      // Ignore lifetime intrinsics, instead of creating LCSSA phis for them.
+      // The intrinsics can be removed later by a call to
+      // cleanupDanglingLifetimeUsers.
+      if (auto *II = dyn_cast<IntrinsicInst>(User))
+        if (II->isLifetimeStartOrEnd())
+          continue;
+
       // For practical purposes, we consider that the use in a PHI
       // occurs in the respective predecessor block. For more info,
       // see the `phi` doc in LangRef and the LCSSA doc.
@@ -157,8 +134,6 @@ formLCSSAForInstructionsImpl(SmallVectorImpl<Instruction *> &Worklist,
         UsesToRewrite.push_back(&U);
     }
 
-    fixLifetimeIntrinsics(I, UsesToRewrite);
-
     // If there are no uses outside the loop, exit with no change.
     if (UsesToRewrite.empty())
       continue;
@@ -392,6 +367,48 @@ static void computeBlocksDominatingExits(
   }
 }
 
+/// Ensure strict LCSSA form for the given loop, by removing lifetime
+/// intrinsics that are used outside the loop.
+///
+/// Returns true if any modifications are made.
+bool llvm::cleanupDanglingLifetimeUsers(Loop *L, const DominatorTree &DT) {
+  SmallVector<BasicBlock *, 8> ExitBlocks;
+  L->getExitBlocks(ExitBlocks);
+
+  if (ExitBlocks.empty())
+    return false;
+
+  // Look only at allocas that dominate the loop exits.
+  SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
+  computeBlocksDominatingExits(*L, DT, ExitBlocks, BlocksDominatingExits);
+
+  SmallVector<Instruction *, 8> ToRemove;
+  bool Changed = false;
+
+  for (auto *BB : BlocksDominatingExits) {
+    for (auto &I : *BB) {
+      if (auto *AI = dyn_cast<AllocaInst>(&I)) {
+        for (Use &U : AI->uses()) {
+          auto *User = cast<Instruction>(U.getUser());
+
+          if (L->contains(User->getParent()))
+            continue;
+
+          if (User->isLifetimeStartOrEnd())
+            ToRemove.push_back(User);
+        }
+      }
+    }
+  }
+
+  for (Instruction *I : ToRemove) {
+    I->eraseFromParent();
+    Changed = true;
+  }
+
+  return Changed;
+}
+
 static bool formLCSSAImpl(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
                           ScalarEvolution *SE,
                           LoopExitBlocksTy &LoopExitBlocks) {
diff --git a/llvm/lib/Transforms/Utils/LoopPeel.cpp b/llvm/lib/Transforms/Utils/LoopPeel.cpp
index 960ec9d4c7d6e..50a6723541991 100644
--- a/llvm/lib/Transforms/Utils/LoopPeel.cpp
+++ b/llvm/lib/Transforms/Utils/LoopPeel.cpp
@@ -1117,6 +1117,13 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, bool PeelLast, LoopInfo *LI,
   SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
   L->getExitEdges(ExitEdges);
 
+  // LCSSA allows lifetime intrinsics and tokens to directly use loop
+  // instructions, as they cannot use a phi.
+  // Cloning loop blocks requires a phi join; just remove the problematic
+  // instructions.
+  if (cleanupDanglingLifetimeUsers(L, DT))
+    LLVM_DEBUG(dbgs() << "Peeling: removed dangling lifetime users.\n");
+
   // Remember dominators of blocks we might reach through exits to change them
   // later. Immediate dominator of such block might change, because we add more
   // routes which can lead to the exit: we can reach it from the peeled
diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 0f256398e5b1e..cc9b7579f7f19 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -555,6 +555,13 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   if (CompletelyUnroll)
     ULO.Runtime = false;
 
+  // LCSSA allows lifetime intrinsics and tokens to directly use loop
+  // instructions, as they cannot use a phi.
+  // Cloning loop blocks requires a phi join; just remove the problematic
+  // instructions.
+  if (cleanupDanglingLifetimeUsers(L, *DT))
+    LLVM_DEBUG(dbgs() << "Unroll: removed dangling lifetime users.\n");
+
   // Go through all exits of L and see if there are any phi-nodes there. We just
   // conservatively assume that they're inserted to preserve LCSSA form, which
   // means that complete unrolling might break this form. We need to either fix
diff --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index 1e614bd29ee6e..9b2fb626c857f 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -256,6 +256,11 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
     SE->forgetBlockAndLoopDispositions();
   }
 
+  // Remove problematic lifetime intrinsics outside the loop.
+  // Avoids the need to merge allocas with a phi.
+  if (cleanupDanglingLifetimeUsers(L, *DT))
+    LLVM_DEBUG(dbgs() << "U&J: removed dangling lifetime users.\n");
+
   using namespace ore;
   // Report the unrolling decision.
   if (CompletelyUnroll) {
diff --git a/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll b/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll
index fbdd4f4afa26f..18165e9a7664d 100644
--- a/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll
+++ b/llvm/test/Transforms/LCSSA/lifetime-intrinsic.ll
@@ -1,9 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
 ; RUN: opt -passes=lcssa -S < %s | FileCheck %s
 
-; Previously crashing in the verifier, due to LCSSA inserting a phi between
-; the alloca and the lifetime intrinsic.
-; We can instead remove the problematic intrinsic and its corresponding start.
+; LCSSA should ignore the lifetime intrinsics, as we cannot create phis
+; for them. If the client needs strict LCSSA form, they can call
+; cleanupDanglingLifetimeUsers afterwards.
 
 declare void @llvm.lifetime.end.p0(ptr captures(none))
 declare void @llvm.lifetime.start.p0(i64, ptr captures(none))
@@ -13,8 +13,10 @@ define fastcc i32 @test() {
 ; CHECK-NEXT:    br label %[[BB1:.*]]
 ; CHECK:       [[BB1]]:
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr [[A]])
 ; CHECK-NEXT:    br i1 false, label %[[BB2:.*]], label %[[BB1]]
 ; CHECK:       [[BB2]]:
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0(ptr [[A]])
 ; CHECK-NEXT:    ret i32 0
 ;
   br label %1
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll b/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll
new file mode 100644
index 0000000000000..10e8238a510fd
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll
@@ -0,0 +1,59 @@
+; RUN: opt -unroll-allow-partial -unroll-runtime -passes="loop-unroll<O3>" -S < %s | FileCheck %s
+
+; Various scenarios where a loop has an alloca with a live-out lifetime use.
+; LCSSA can't help here because a lifetime marker can't have a phi
+; definition.
+; Remove the lifetime marker(s) before unrolling, to prevent multiple defs
+; and a single use.
+
+; CHECK-LABEL: @peelit
+; CHECK: alloca i32
+; CHECK-NOT: call{{.*}}@llvm.lifetime
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(ptr captures(none)) #0
+declare void @llvm.lifetime.end.p0(ptr captures(none)) #0
+
+define fastcc i32 @peelit() {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %phi = phi i64 [ 1, %bb ], [ 0, %bb1 ]
+  %alloca = alloca i32, align 4
+  br i1 false, label %bb2, label %bb1
+
+bb2:                                              ; preds = %bb1
+  call void @llvm.lifetime.start.p0(ptr %alloca)
+  unreachable
+}
+
+; CHECK-LABEL: @partial
+; CHECK: call{{.*}}umax
+; CHECK: alloca i32
+; CHECK-NOT: call{{.*}}@llvm.lifetime.end
+
+define fastcc i32 @partial(i32 %max) {
+bb:
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %bb
+  %phic = phi i32 [ 0, %bb ], [ %next, %bb1 ]
+  %alloca = alloca i32, align 8
+  call void @llvm.lifetime.start.p0(ptr %alloca)
+  store i32 %phic, ptr %alloca, align 8
+  %next = add i32 %phic, 1
+  %cmp = icmp ult i32 %next, %max
+  br i1 %cmp, label %bb1, label %bb2
+
+bb2:                                              ; preds = %bb1
+  call void @llvm.lifetime.end.p0(ptr %alloca)
+  ret i32 %phic
+}
+
+; full unroll is OK because there's no merge point.
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
diff --git a/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam-alloca.ll b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam-alloca.ll
new file mode 100644
index 0000000000000..deda42b735971
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnrollAndJam/unroll-and-jam-alloca.ll
@@ -0,0 +1,36 @@
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+; RUN: opt -passes=loop-unroll-and-jam -allow-unroll-and-jam -unroll-and-jam-count=4 -unroll-remainder < %s -S | FileCheck %s
+
+; The alloca and lifetime use are problematic here, because the unroll
+; duplication will require a merge. A lifetime intrinsic can't have a
+; phi definition. The lifetime intrinsic can be removed.
+
+; CHECK: [[xtraiter:%.+]] = and i32 {{.*}}, 3
+; CHECK-NOT: call{{.*}}@llvm.lifetime
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(ptr captures(none)) #0
+
+define void @test1(i32 %I) {
+entry:
+  br label %for.outer
+
+for.outer:                                        ; preds = %for.latch, %entry
+  %i = phi i32 [ %add8, %for.latch ], [ 0, %entry ]
+  br label %for.inner
+
+for.inner:                                        ; preds = %for.inner, %for.outer
+  %alloca = alloca i32, align 4
+  br i1 true, label %for.latch, label %for.inner
+
+for.latch:                                        ; preds = %for.inner
+  %add8 = add i32 %i, 1
+  %exitcond25 = icmp eq i32 %i, %I
+  br i1 %exitcond25, label %for.end.loopexit, label %for.outer
+
+for.end.loopexit:                                 ; preds = %for.latch
+  call void @llvm.lifetime.start.p0(ptr %alloca)
+  ret void
+}
+
+attributes #0 = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }

>From db38aa81df9889ac145d5a5abd4c1d63b449554d Mon Sep 17 00:00:00 2001
From: Chang-Sun Lin Jr <chang-sun.lin.jr at intel.com>
Date: Wed, 14 Jan 2026 23:33:45 -0800
Subject: [PATCH 3/5] Fix up vectorization pass also.

---
 .../Transforms/Vectorize/LoopVectorize.cpp    |  5 ++
 .../LoopVectorize/alloca-lifetime.ll          | 59 +++++++++++++++++++
 2 files changed, 64 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 3f1e12e5d1cd0..8451d446f60d5 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -10183,6 +10183,11 @@ LoopVectorizeResult LoopVectorizePass::runImpl(Function &F) {
     // transform.
     Changed |= formLCSSARecursively(*L, *DT, LI, SE);
 
+    // Remove problematic lifetime intrinsics outside the loop.
+    // Avoids the need to merge allocas with a phi.
+    if (cleanupDanglingLifetimeUsers(L, *DT))
+      LLVM_DEBUG(dbgs() << "Vectorizer: removed dangling lifetime users.\n");
+
     Changed |= CFGChanged |= processLoop(L);
 
     if (Changed) {
diff --git a/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll b/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll
new file mode 100644
index 0000000000000..630ef9723ac68
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S 2>&1 | FileCheck %s
+
+; The alloca is live-out of the loop. The lifetime intrinsics can be removed
+; before vectorization, to avoid SSA violations.
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+declare void @llvm.lifetime.start.p0(ptr nocapture) nounwind
+declare void @llvm.lifetime.end.p0(ptr nocapture) nounwind
+
+; CHECK-LABEL: @live_alloca
+; CHECK: 4 x i32
+; CHECK-NOT: call {{.*}} @llvm.lifetime.end
+define i32 @live_alloca(ptr %a, i64 %n) {
+entry:
+  br label %for.body
+
+for.body:
+  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+  %r = phi i32 [ %tmp3, %for.body ], [ 0, %entry ]
+  %alloca = alloca i32, align 4
+  call void @llvm.lifetime.start.p0(ptr %alloca)
+  %i.next = add nuw nsw i64 %i, 1
+  %cond = icmp slt i64 %i.next, %n
+  %tmp0 = select i1 %cond, i64 %i.next, i64 0
+  %tmp1 = getelementptr inbounds i32, ptr %a, i64 %tmp0
+  %tmp2 = load i32, ptr %tmp1, align 8
+  %tmp3 = add i32 %r, %tmp2
+  br i1 %cond, label %for.body, label %for.end
+
+for.end:
+  %tmp4 = phi i32 [ %tmp3, %for.body ]
+  call void @llvm.lifetime.end.p0(ptr %alloca)
+  ret i32 %tmp4
+}
+
+; CHECK-LABEL: @live_alloca2
+; CHECK: alloca i32
+; CHECK-NOT: call {{.*}} @llvm.lifetime.end
+define void @live_alloca2(ptr %ptr) {
+entry:
+  br label %loop
+
+loop:                       ; preds = %loop, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %alloca = alloca i32, align 4
+  call void @llvm.lifetime.start.p0(ptr %alloca)
+  %cond0 = icmp ult i64 %iv, 13
+  %s = select i1 %cond0, i32 10, i32 20
+  %gep = getelementptr inbounds i32, ptr %ptr, i64 %iv
+  store i32 %s, ptr %gep
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 14
+  br i1 %exitcond, label %exit, label %loop
+
+exit:
+  call void @llvm.lifetime.end.p0(ptr %alloca)
+  ret void
+}

>From 257cf487e01fc9fdeb66fb30e80bede753cec24f Mon Sep 17 00:00:00 2001
From: Chang-Sun Lin Jr <chang-sun.lin.jr at intel.com>
Date: Tue, 20 Jan 2026 15:20:12 -0800
Subject: [PATCH 4/5] Remove all associated lifetime markers when removing one
 marker.

---
 .../include/llvm/Transforms/Utils/LoopUtils.h |  3 +-
 llvm/lib/Transforms/Utils/LCSSA.cpp           | 36 +++++++++----------
 .../Transforms/LoopUnroll/unroll-alloca.ll    |  2 +-
 .../LoopVectorize/alloca-lifetime.ll          |  7 ++--
 4 files changed, 24 insertions(+), 24 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index de6c69e177086..bffdc104be709 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -66,7 +66,8 @@ LLVM_ABI bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
                                       bool PreserveLCSSA);
 
 /// Ensure strict LCSSA form for the given loop, by removing lifetime
-/// intrinsics that are used outside the loop.
+/// intrinsics that cross the exit boundary, as well as their associated
+/// partners.
 ///
 /// Returns true if any modifications are made.
 LLVM_ABI bool cleanupDanglingLifetimeUsers(Loop *L, const DominatorTree &DT);
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index eb543c6dfcce2..d047da150f172 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -368,7 +368,8 @@ static void computeBlocksDominatingExits(
 }
 
 /// Ensure strict LCSSA form for the given loop, by removing lifetime
-/// intrinsics that are used outside the loop.
+/// intrinsics that cross the exit boundary, as well as their associated
+/// partners.
 ///
 /// Returns true if any modifications are made.
 bool llvm::cleanupDanglingLifetimeUsers(Loop *L, const DominatorTree &DT) {
@@ -378,35 +379,34 @@ bool llvm::cleanupDanglingLifetimeUsers(Loop *L, const DominatorTree &DT) {
   if (ExitBlocks.empty())
     return false;
 
-  // Look only at allocas that dominate the loop exits.
+  // Look only at allocas in the loop, that dominate the loop exits.
   SmallSetVector<BasicBlock *, 8> BlocksDominatingExits;
   computeBlocksDominatingExits(*L, DT, ExitBlocks, BlocksDominatingExits);
 
-  SmallVector<Instruction *, 8> ToRemove;
-  bool Changed = false;
+  SmallVector<AllocaInst *, 8> ToClean;
 
   for (auto *BB : BlocksDominatingExits) {
     for (auto &I : *BB) {
       if (auto *AI = dyn_cast<AllocaInst>(&I)) {
-        for (Use &U : AI->uses()) {
-          auto *User = cast<Instruction>(U.getUser());
-
-          if (L->contains(User->getParent()))
-            continue;
-
-          if (User->isLifetimeStartOrEnd())
-            ToRemove.push_back(User);
-        }
+        bool LifetimeOutsideLoop = llvm::any_of(AI->users(), [&](User *U) {
+          auto *Inst = cast<Instruction>(U);
+          return Inst->isLifetimeStartOrEnd() &&
+                 !L->contains(Inst->getParent());
+        });
+
+        if (LifetimeOutsideLoop)
+          ToClean.push_back(AI);
       }
     }
   }
 
-  for (Instruction *I : ToRemove) {
-    I->eraseFromParent();
-    Changed = true;
-  }
+  for (auto *AI : ToClean)
+    for (auto *U : make_early_inc_range(AI->users()))
+      if (auto *II = dyn_cast<IntrinsicInst>(U))
+        if (II->isLifetimeStartOrEnd())
+          II->eraseFromParent();
 
-  return Changed;
+  return !ToClean.empty();
 }
 
 static bool formLCSSAImpl(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
diff --git a/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll b/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll
index 10e8238a510fd..db63983e9b598 100644
--- a/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll
+++ b/llvm/test/Transforms/LoopUnroll/unroll-alloca.ll
@@ -34,7 +34,7 @@ bb2:                                              ; preds = %bb1
 ; CHECK-LABEL: @partial
 ; CHECK: call{{.*}}umax
 ; CHECK: alloca i32
-; CHECK-NOT: call{{.*}}@llvm.lifetime.end
+; CHECK-NOT: call{{.*}}@llvm.lifetime
 
 define fastcc i32 @partial(i32 %max) {
 bb:
diff --git a/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll b/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll
index 630ef9723ac68..c5e19d1ac279d 100644
--- a/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll
+++ b/llvm/test/Transforms/LoopVectorize/alloca-lifetime.ll
@@ -9,8 +9,8 @@ declare void @llvm.lifetime.start.p0(ptr nocapture) nounwind
 declare void @llvm.lifetime.end.p0(ptr nocapture) nounwind
 
 ; CHECK-LABEL: @live_alloca
-; CHECK: 4 x i32
-; CHECK-NOT: call {{.*}} @llvm.lifetime.end
+; CHECK: 4 x i64
+; CHECK-NOT: call {{.*}} @llvm.lifetime
 define i32 @live_alloca(ptr %a, i64 %n) {
 entry:
   br label %for.body
@@ -35,8 +35,7 @@ for.end:
 }
 
 ; CHECK-LABEL: @live_alloca2
-; CHECK: alloca i32
-; CHECK-NOT: call {{.*}} @llvm.lifetime.end
+; CHECK-NOT: call {{.*}} @llvm.lifetime
 define void @live_alloca2(ptr %ptr) {
 entry:
   br label %loop

>From 687d71ba5a935957a7402f7cc2b004440853e991 Mon Sep 17 00:00:00 2001
From: Chang-Sun Lin Jr <chang-sun.lin.jr at intel.com>
Date: Tue, 10 Mar 2026 15:36:31 -0700
Subject: [PATCH 5/5] fix potential fail: the lifetime intrinsics should only
 be removed if LoopUnroll will actually do something (and tell the PM that
 analyses are invalidated). Lifetime intrinsics affect MSSA.

---
 llvm/lib/Transforms/Utils/LoopUnroll.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
index 4a429cf8bf8ed..fef5d5151538e 100644
--- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp
@@ -555,13 +555,6 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
   if (CompletelyUnroll)
     ULO.Runtime = false;
 
-  // LCSSA allows lifetime intrinsics and tokens to directly use loop
-  // instructions, as they cannot use a phi.
-  // Cloning loop blocks requires a phi join; just remove the problematic
-  // instructions.
-  if (cleanupDanglingLifetimeUsers(L, *DT))
-    LLVM_DEBUG(dbgs() << "Unroll: removed dangling lifetime users.\n");
-
   // Go through all exits of L and see if there are any phi-nodes there. We just
   // conservatively assume that they're inserted to preserve LCSSA form, which
   // means that complete unrolling might break this form. We need to either fix
@@ -612,6 +605,13 @@ llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
     }
   }
 
+  // LCSSA allows lifetime intrinsics and tokens to directly use loop
+  // instructions, as they cannot use a phi.
+  // Cloning loop blocks requires a phi join; just remove the problematic
+  // instructions.
+  if (cleanupDanglingLifetimeUsers(L, *DT))
+    LLVM_DEBUG(dbgs() << "Unroll: removed dangling lifetime users.\n");
+
   using namespace ore;
   // Report the unrolling decision.
   if (CompletelyUnroll) {



More information about the llvm-commits mailing list