[llvm] f9c2565 - Reapply "[ControlHeightReduction] Drop lifetime annotations where necessary" (#160640)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 25 06:55:30 PDT 2025
Author: Aiden Grossman
Date: 2025-09-25T13:55:27Z
New Revision: f9c2565117106467c3c8b1975e67c9750aee34e3
URL: https://github.com/llvm/llvm-project/commit/f9c2565117106467c3c8b1975e67c9750aee34e3
DIFF: https://github.com/llvm/llvm-project/commit/f9c2565117106467c3c8b1975e67c9750aee34e3.diff
LOG: Reapply "[ControlHeightReduction] Drop lifetime annotations where necessary" (#160640)
Reapplies #159686
This reverts commit 4f33d7b7a9f39d733b7572f9afbf178bca8da127.
The original landing of this patch had an issue where it would try and
hoist allocas into the entry block that were in the entry block. This
would end up actually moving them lower in the block potentially after
users, resulting in invalid IR.
This update fixes this by ensuring that we are only hoisting static
allocas that have been sunk into a split basic block. A regression test
has been added.
Integration tested using a three stage build of clang with IRPGO
enabled.
Added:
llvm/test/Transforms/PGOProfile/chr-lifetimes.ll
Modified:
llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
index c14bbecf0d4e1..7c78eb35a865a 100644
--- a/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
@@ -1591,7 +1591,16 @@ static void insertTrivialPHIs(CHRScope *Scope,
}
TrivialPHIs.insert(PN);
CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
+ bool FoundLifetimeAnnotation = false;
for (Instruction *UI : Users) {
+ // If we found a lifetime annotation, remove it, but set a flag
+ // to ensure that we remove all other lifetime annotations attached
+ // to the alloca.
+ if (UI->isLifetimeStartOrEnd()) {
+ UI->eraseFromParent();
+ FoundLifetimeAnnotation = true;
+ continue;
+ }
for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
if (UI->getOperand(J) == &I) {
UI->setOperand(J, PN);
@@ -1599,6 +1608,14 @@ static void insertTrivialPHIs(CHRScope *Scope,
}
CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
}
+ // Erase any leftover lifetime annotations for a dynamic alloca.
+ if (FoundLifetimeAnnotation) {
+ for (User *U : make_early_inc_range(I.users())) {
+ if (auto *UI = dyn_cast<Instruction>(U))
+ if (UI->isLifetimeStartOrEnd())
+ UI->eraseFromParent();
+ }
+ }
}
}
}
@@ -1693,14 +1710,12 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
BasicBlock *ExitBlock = LastRegion->getExit();
std::optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
- if (ExitBlock) {
- // Insert a trivial phi at the exit block (where the CHR hot path and the
- // cold path merges) for a value that's defined in the scope but used
- // outside it (meaning it's alive at the exit block). We will add the
- // incoming values for the CHR cold paths to it below. Without this, we'd
- // miss updating phi's for such values unless there happens to already be a
- // phi for that value there.
- insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
+ SmallVector<AllocaInst *> StaticAllocas;
+ for (Instruction &I : *EntryBlock) {
+ if (auto *AI = dyn_cast<AllocaInst>(&I)) {
+ if (AI->isStaticAlloca())
+ StaticAllocas.push_back(AI);
+ }
}
// Split the entry block of the first region. The new block becomes the new
@@ -1719,6 +1734,20 @@ void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
FirstRegion->replaceEntryRecursive(NewEntryBlock);
BasicBlock *PreEntryBlock = EntryBlock;
+ // Move static allocas into the pre-entry block so they stay static.
+ for (AllocaInst *AI : StaticAllocas)
+ AI->moveBefore(EntryBlock->begin());
+
+ if (ExitBlock) {
+ // Insert a trivial phi at the exit block (where the CHR hot path and the
+ // cold path merges) for a value that's defined in the scope but used
+ // outside it (meaning it's alive at the exit block). We will add the
+ // incoming values for the CHR cold paths to it below. Without this, we'd
+ // miss updating phi's for such values unless there happens to already be a
+ // phi for that value there.
+ insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
+ }
+
ValueToValueMapTy VMap;
// Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
// hot path (originals) and a cold path (clones) and update the PHIs at the
diff --git a/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll b/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll
new file mode 100644
index 0000000000000..b29834f9fe960
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/chr-lifetimes.ll
@@ -0,0 +1,245 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes='require<profile-summary>,chr' -S | FileCheck %s
+
+declare void @foo()
+declare void @bar()
+declare void @baz(i64)
+
+; Test that when we have a static alloca in an entry block that will get split,
+; the alloca remains static and we preserve its lifetime annotations.
+define void @test_chr_with_lifetimes(ptr %i) !prof !14 {
+; CHECK-LABEL: @test_chr_with_lifetimes(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = freeze i1 [[TMP1]]
+; CHECK-NEXT: [[TMP10:%.*]] = select i1 true, i1 [[TMP9]], i1 false
+; CHECK-NEXT: [[TMP11:%.*]] = freeze i1 [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP10]], i1 [[TMP11]], i1 false
+; CHECK-NEXT: br i1 [[TMP5]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15:![0-9]+]]
+; CHECK: entry.split:
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 true, i64 0, i64 4, !prof [[PROF16:![0-9]+]]
+; CHECK-NEXT: call void @baz(i64 [[TMP6]])
+; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB0:%.*]], !prof [[PROF17:![0-9]+]]
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], i64 0, i64 4, !prof [[PROF16]]
+; CHECK-NEXT: call void @baz(i64 [[TMP7]])
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]]
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1]]
+; CHECK: bb1:
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[TEST]])
+; CHECK-NEXT: store ptr [[TEST]], ptr [[I]], align 8
+; CHECK-NEXT: br label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[TMP2:%.*]] = phi ptr [ [[TMP3:%.*]], [[BB2]] ], [ null, [[BB1]] ]
+; CHECK-NEXT: [[TMP3]] = getelementptr i8, ptr [[TMP2]], i64 24
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq ptr [[TMP2]], [[I]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2]]
+; CHECK: bb3:
+; CHECK-NEXT: ret void
+;
+entry:
+ %1 = load i32, ptr %i
+ %2 = icmp eq i32 %1, 0
+ %3 = select i1 %2, i64 4, i64 0, !prof !15
+ %test = alloca i32, align 8
+ call void @baz(i64 %3)
+ br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ call void @llvm.lifetime.start.p0(ptr %test)
+ store ptr %test, ptr %i, align 8
+ br label %bb2
+
+bb2:
+ %4 = phi ptr [ %5, %bb2 ], [ null, %bb1 ]
+ %5 = getelementptr i8, ptr %4, i64 24
+ %6 = icmp eq ptr %4, %i
+ br i1 %6, label %bb3, label %bb2
+
+bb3:
+ ret void
+}
+
+; Test that we remove lifetime markers that would otherwise refer to phi
+; nodes given the dynamic allocas they referred to have been duplicated.
+define void @test_chr_dynamic_alloca(ptr %i) !prof !14 {
+; CHECK-LABEL: @test_chr_dynamic_alloca(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TEST1:%.*]] = load i32, ptr [[I:%.*]], align 4
+; CHECK-NEXT: [[TEST2:%.*]] = icmp eq i32 [[TEST1]], 5
+; CHECK-NEXT: br i1 [[TEST2]], label [[BB4:%.*]], label [[BB3:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 true, i1 [[TMP2]], i1 false
+; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
+; CHECK-NEXT: br i1 [[TMP5]], label [[BB4_SPLIT:%.*]], label [[BB4_SPLIT_NONCHR:%.*]], !prof [[PROF15]]
+; CHECK: bb4.split:
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 true, i64 0, i64 4, !prof [[PROF16]]
+; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8
+; CHECK-NEXT: call void @baz(i64 [[TMP6]])
+; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB0:%.*]], !prof [[PROF17]]
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: store ptr [[TEST]], ptr [[I]], align 8
+; CHECK-NEXT: br label [[BB1]]
+; CHECK: bb4.split.nonchr:
+; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], i64 0, i64 4, !prof [[PROF16]]
+; CHECK-NEXT: [[TEST_NONCHR:%.*]] = alloca i32, align 8
+; CHECK-NEXT: call void @baz(i64 [[TMP7]])
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]]
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: store ptr [[TEST_NONCHR]], ptr [[I]], align 8
+; CHECK-NEXT: br label [[BB1]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP8:%.*]] = phi ptr [ [[TEST]], [[BB0]] ], [ [[TEST]], [[BB4_SPLIT]] ], [ [[TEST_NONCHR]], [[BB0_NONCHR]] ], [ [[TEST_NONCHR]], [[BB4_SPLIT_NONCHR]] ]
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: store ptr [[TMP8]], ptr [[I]], align 8
+; CHECK-NEXT: br label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[TMP9:%.*]] = phi ptr [ [[TMP10:%.*]], [[BB2]] ], [ null, [[BB1]] ]
+; CHECK-NEXT: [[TMP10]] = getelementptr i8, ptr [[TMP9]], i64 24
+; CHECK-NEXT: [[TEST5:%.*]] = load ptr, ptr [[TMP8]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq ptr [[TMP9]], [[TEST5]]
+; CHECK-NEXT: br i1 [[TMP11]], label [[BB3]], label [[BB2]]
+; CHECK: bb3:
+; CHECK-NEXT: ret void
+;
+entry:
+ %test1 = load i32, ptr %i
+ %test2 = icmp eq i32 %test1, 5
+ br i1 %test2, label %bb4, label %bb3
+
+bb4:
+ %1 = load i32, ptr %i
+ %2 = icmp eq i32 %1, 0
+ %3 = select i1 %2, i64 4, i64 0, !prof !15
+ %test = alloca i32, align 8
+ call void @baz(i64 %3)
+ br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+ call void @foo()
+ call void @llvm.lifetime.start.p0(ptr %test)
+ store ptr %test, ptr %i, align 8
+ br label %bb1
+
+bb1:
+ call void @bar()
+ call void @llvm.lifetime.start.p0(ptr %test)
+ store ptr %test, ptr %i, align 8
+ br label %bb2
+
+bb2:
+ %4 = phi ptr [ %5, %bb2 ], [ null, %bb1 ]
+ %5 = getelementptr i8, ptr %4, i64 24
+ %test5 = load ptr, ptr %test
+ call void @llvm.lifetime.end.p0(ptr %test)
+ %6 = icmp eq ptr %4, %test5
+ br i1 %6, label %bb3, label %bb2
+
+bb3:
+ ret void
+}
+
+; Test that we do not move around allocas that occur in the entry block
+; before splitting. If we accidentally sink them, we can move them after
+; their users.
+define void @test_no_move_allocas(ptr %i) !prof !14 {
+; CHECK-LABEL: @test_no_move_allocas(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TEST:%.*]] = alloca i32, align 8
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr [[TEST]])
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I:%.*]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[TMP0]], 0
+; CHECK-NEXT: [[TMP2:%.*]] = freeze i1 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = select i1 true, i1 [[TMP2]], i1 false
+; CHECK-NEXT: [[TMP4:%.*]] = freeze i1 [[TMP1]]
+; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i1 [[TMP4]], i1 false
+; CHECK-NEXT: br i1 [[TMP5]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof [[PROF15]]
+; CHECK: entry.split:
+; CHECK-NEXT: [[TMP6:%.*]] = select i1 true, i64 0, i64 4, !prof [[PROF16]]
+; CHECK-NEXT: call void @baz(i64 [[TMP6]])
+; CHECK-NEXT: br i1 false, label [[BB1:%.*]], label [[BB0:%.*]], !prof [[PROF17]]
+; CHECK: bb0:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1]]
+; CHECK: entry.split.nonchr:
+; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP1]], i64 0, i64 4, !prof [[PROF16]]
+; CHECK-NEXT: call void @baz(i64 [[TMP7]])
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof [[PROF16]]
+; CHECK: bb0.nonchr:
+; CHECK-NEXT: call void @foo()
+; CHECK-NEXT: br label [[BB1]]
+; CHECK: bb1:
+; CHECK-NEXT: call void @bar()
+; CHECK-NEXT: br label [[BB2:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: [[TMP8:%.*]] = phi ptr [ [[TMP9:%.*]], [[BB2]] ], [ null, [[BB1]] ]
+; CHECK-NEXT: [[TMP9]] = getelementptr i8, ptr [[TMP8]], i64 24
+; CHECK-NEXT: [[TMP10:%.*]] = icmp eq ptr [[TMP8]], [[I]]
+; CHECK-NEXT: br i1 [[TMP10]], label [[BB3:%.*]], label [[BB2]]
+; CHECK: bb3:
+; CHECK-NEXT: ret void
+;
+entry:
+ %test = alloca i32, align 8
+ call void @llvm.lifetime.start.p0(ptr %test)
+ %1 = load i32, ptr %i
+ %2 = icmp eq i32 %1, 0
+ %3 = select i1 %2, i64 4, i64 0, !prof !15
+ call void @baz(i64 %3)
+ br i1 %2, label %bb1, label %bb0, !prof !15
+
+bb0:
+ call void @foo()
+ br label %bb1
+
+bb1:
+ call void @bar()
+ br label %bb2
+
+bb2:
+ %4 = phi ptr [ %5, %bb2 ], [ null, %bb1 ]
+ %5 = getelementptr i8, ptr %4, i64 24
+ %6 = icmp eq ptr %4, %i
+ br i1 %6, label %bb3, label %bb2
+
+bb3:
+ ret void
+}
+
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+
+!14 = !{!"function_entry_count", i64 100}
+!15 = !{!"branch_weights", i32 0, i32 1}
+; CHECK: !15 = !{!"branch_weights", i32 1000, i32 0}
More information about the llvm-commits
mailing list