[llvm] Reland [BasicBlockUtils] Handle funclets when detaching EH pad blocks (PR #159379)
Gábor Spaits via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 18 12:27:45 PDT 2025
https://github.com/spaits updated https://github.com/llvm/llvm-project/pull/159379
>From 3bce2c0771d94e4f7aae1d194cbec261b7c42864 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=A1bor=20Spaits?= <gaborspaits1 at gmail.com>
Date: Thu, 11 Sep 2025 22:52:16 +0200
Subject: [PATCH 1/2] Reland [BasicBlockUtils] Handle funclets when detaching
EH pad blocks (#157363)
Fixes #148052 .
When removing EH Pad blocks, the value defined by them becomes poison. These poison values are then used by `catchret` and `cleanupret`, which is invalid. This commit replaces those unreachable `catchret` and `cleanupret` instructions with `unreachable`.
---
llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 93 ++++---
.../unreachable-multi-basic-block-funclet.ll | 236 ++++++++++++++++++
2 files changed, 301 insertions(+), 28 deletions(-)
create mode 100644 llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index cad0b4c12b54e..1dd8cb4ee584c 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -58,37 +58,74 @@ static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth(
"is followed by a block that either has a terminating "
"deoptimizing call or is terminated with an unreachable"));
-void llvm::detachDeadBlocks(
- ArrayRef<BasicBlock *> BBs,
- SmallVectorImpl<DominatorTree::UpdateType> *Updates,
- bool KeepOneInputPHIs) {
+/// Zap all the instructions in the block and replace them with an unreachable
+/// instruction and notify the basic block's successors that one of their
+/// predecessors is going away.
+static void
+emptyAndDetachBlock(BasicBlock *BB,
+ SmallVectorImpl<DominatorTree::UpdateType> *Updates,
+ bool KeepOneInputPHIs) {
+ // Loop through all of our successors and make sure they know that one
+ // of their predecessors is going away.
+ SmallPtrSet<BasicBlock *, 4> UniqueSuccessors;
+ for (BasicBlock *Succ : successors(BB)) {
+ Succ->removePredecessor(BB, KeepOneInputPHIs);
+ if (Updates && UniqueSuccessors.insert(Succ).second)
+ Updates->push_back({DominatorTree::Delete, BB, Succ});
+ }
+
+ // Zap all the instructions in the block.
+ while (!BB->empty()) {
+ Instruction &I = BB->back();
+ // If this instruction is used, replace uses with an arbitrary value.
+ // Because control flow can't get here, we don't care what we replace the
+ // value with. Note that since this block is unreachable, and all values
+ // contained within it must dominate their uses, that all uses will
+ // eventually be removed (they are themselves dead).
+ if (!I.use_empty())
+ I.replaceAllUsesWith(PoisonValue::get(I.getType()));
+ BB->back().eraseFromParent();
+ }
+ new UnreachableInst(BB->getContext(), BB);
+ assert(BB->size() == 1 && isa<UnreachableInst>(BB->getTerminator()) &&
+ "The successor list of BB isn't empty before "
+ "applying corresponding DTU updates.");
+}
+
+void llvm::detachDeadBlocks(ArrayRef<BasicBlock *> BBs,
+ SmallVectorImpl<DominatorTree::UpdateType> *Updates,
+ bool KeepOneInputPHIs) {
for (auto *BB : BBs) {
- // Loop through all of our successors and make sure they know that one
- // of their predecessors is going away.
- SmallPtrSet<BasicBlock *, 4> UniqueSuccessors;
- for (BasicBlock *Succ : successors(BB)) {
- Succ->removePredecessor(BB, KeepOneInputPHIs);
- if (Updates && UniqueSuccessors.insert(Succ).second)
- Updates->push_back({DominatorTree::Delete, BB, Succ});
+ auto NonFirstPhiIt = BB->getFirstNonPHIIt();
+ if (NonFirstPhiIt != BB->end()) {
+ Instruction &I = *NonFirstPhiIt;
+ // Exception handling funclets need to be explicitly addressed.
+ // These funclets must begin with cleanuppad or catchpad and end with
+ // cleanupred or catchret. The return instructions can be in different
+ // basic blocks than the pad instruction. If we would only delete the
+ // first block, the we would have possible cleanupret and catchret
+ // instructions with poison arguments, which wouldn't be valid.
+ if (isa<FuncletPadInst>(I)) {
+ SmallPtrSet<BasicBlock *, 4> UniqueEHRetBlocksToDelete;
+ for (User *User : I.users()) {
+ Instruction *ReturnInstr = dyn_cast<Instruction>(User);
+ // If we have a cleanupret or catchret block, replace it with just an
+ // unreachable. The other alternative, that may use a catchpad is a
+ // catchswitch. That does not need special handling for now.
+ if (isa<CatchReturnInst>(ReturnInstr) ||
+ isa<CleanupReturnInst>(ReturnInstr)) {
+ BasicBlock *ReturnInstrBB = ReturnInstr->getParent();
+ UniqueEHRetBlocksToDelete.insert(ReturnInstrBB);
+ }
+ }
+ for (BasicBlock *EHRetBB :
+ make_early_inc_range(UniqueEHRetBlocksToDelete))
+ emptyAndDetachBlock(EHRetBB, Updates, KeepOneInputPHIs);
+ }
}
- // Zap all the instructions in the block.
- while (!BB->empty()) {
- Instruction &I = BB->back();
- // If this instruction is used, replace uses with an arbitrary value.
- // Because control flow can't get here, we don't care what we replace the
- // value with. Note that since this block is unreachable, and all values
- // contained within it must dominate their uses, that all uses will
- // eventually be removed (they are themselves dead).
- if (!I.use_empty())
- I.replaceAllUsesWith(PoisonValue::get(I.getType()));
- BB->back().eraseFromParent();
- }
- new UnreachableInst(BB->getContext(), BB);
- assert(BB->size() == 1 &&
- isa<UnreachableInst>(BB->getTerminator()) &&
- "The successor list of BB isn't empty before "
- "applying corresponding DTU updates.");
+ // Detaching and emptying the current basic block.
+ emptyAndDetachBlock(BB, Updates, KeepOneInputPHIs);
}
}
diff --git a/llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll b/llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll
new file mode 100644
index 0000000000000..0f0fc78ec7add
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll
@@ -0,0 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=simplifycfg -S < %s | FileCheck %s
+
+; cleanuppad/cleanupret
+
+define void @unreachable_cleanuppad_linear(i64 %shapes.1) personality ptr null {
+; CHECK-LABEL: define void @unreachable_cleanuppad_linear(
+; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null {
+; CHECK-NEXT: [[START:.*:]]
+; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]]
+; CHECK-NEXT: ret void
+;
+start:
+ %_7 = icmp ult i64 0, %shapes.1
+ ret void
+
+funclet:
+ %cleanuppad = cleanuppad within none []
+ br label %funclet_end
+
+funclet_end:
+ cleanupret from %cleanuppad unwind to caller
+}
+
+define void @unreachable_cleanuppad_linear_middle_block(i64 %shapes.1) personality ptr null {
+; CHECK-LABEL: define void @unreachable_cleanuppad_linear_middle_block(
+; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null {
+; CHECK-NEXT: [[START:.*:]]
+; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]]
+; CHECK-NEXT: ret void
+;
+start:
+ %_7 = icmp ult i64 0, %shapes.1
+ ret void
+
+funclet:
+ %cleanuppad = cleanuppad within none []
+ br label %middle_block
+
+middle_block:
+ %tmp1 = add i64 %shapes.1, 42
+ br label %funclet_end
+
+funclet_end:
+ cleanupret from %cleanuppad unwind to caller
+}
+
+define void @unreachable_cleanuppad_multiple_predecessors(i64 %shapes.1) personality ptr null {
+; CHECK-LABEL: define void @unreachable_cleanuppad_multiple_predecessors(
+; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null {
+; CHECK-NEXT: [[START:.*:]]
+; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]]
+; CHECK-NEXT: ret void
+;
+start:
+ %_7 = icmp ult i64 0, %shapes.1
+ ret void
+
+funclet:
+ %cleanuppad = cleanuppad within none []
+ switch i64 %shapes.1, label %otherwise [ i64 0, label %one
+ i64 1, label %two
+ i64 42, label %three ]
+one:
+ br label %funclet_end
+
+two:
+ br label %funclet_end
+
+three:
+ br label %funclet_end
+
+otherwise:
+ br label %funclet_end
+
+funclet_end:
+ cleanupret from %cleanuppad unwind to caller
+}
+
+; catchpad/catchret
+
+define void @unreachable_catchpad_linear(i64 %shapes.1) personality ptr null {
+; CHECK-LABEL: define void @unreachable_catchpad_linear(
+; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null {
+; CHECK-NEXT: [[START:.*:]]
+; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]]
+; CHECK-NEXT: ret void
+;
+start:
+ %_7 = icmp ult i64 0, %shapes.1
+ ret void
+
+dispatch:
+ %cs = catchswitch within none [label %funclet] unwind to caller
+
+funclet:
+ %cleanuppad = catchpad within %cs []
+ br label %funclet_end
+
+
+funclet_end:
+ catchret from %cleanuppad to label %unreachable
+
+unreachable:
+ unreachable
+}
+
+define void @unreachable_catchpad_multiple_predecessors(i64 %shapes.1) personality ptr null {
+; CHECK-LABEL: define void @unreachable_catchpad_multiple_predecessors(
+; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null {
+; CHECK-NEXT: [[START:.*:]]
+; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]]
+; CHECK-NEXT: ret void
+;
+start:
+ %_7 = icmp ult i64 0, %shapes.1
+ ret void
+
+dispatch:
+ %cs = catchswitch within none [label %funclet] unwind to caller
+
+funclet:
+ %cleanuppad = catchpad within %cs []
+ switch i64 %shapes.1, label %otherwise [ i64 0, label %one
+ i64 1, label %two
+ i64 42, label %three ]
+one:
+ br label %funclet_end
+
+two:
+ br label %funclet_end
+
+three:
+ br label %funclet_end
+
+otherwise:
+ br label %funclet_end
+
+funclet_end:
+ catchret from %cleanuppad to label %unreachable
+
+unreachable:
+ unreachable
+}
+
+; Issue reproducer
+
+define void @gh148052(i64 %shapes.1) personality ptr null {
+; CHECK-LABEL: define void @gh148052(
+; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null {
+; CHECK-NEXT: [[START:.*:]]
+; CHECK-NEXT: [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[_7]])
+; CHECK-NEXT: ret void
+;
+start:
+ %_7 = icmp ult i64 0, %shapes.1
+ br i1 %_7, label %bb1, label %panic
+
+bb1:
+ %_11 = icmp ult i64 0, %shapes.1
+ br i1 %_11, label %bb3, label %panic1
+
+panic:
+ unreachable
+
+bb3:
+ ret void
+
+panic1:
+ invoke void @func(i64 0, i64 0, ptr null)
+ to label %unreachable unwind label %funclet_bb14
+
+funclet_bb14:
+ %cleanuppad = cleanuppad within none []
+ br label %bb13
+
+unreachable:
+ unreachable
+
+bb10:
+ cleanupret from %cleanuppad5 unwind to caller
+
+funclet_bb10:
+ %cleanuppad5 = cleanuppad within none []
+ br label %bb10
+
+bb13:
+ cleanupret from %cleanuppad unwind label %funclet_bb10
+}
+
+%struct.foo = type { ptr, %struct.eggs, ptr }
+%struct.eggs = type { ptr, ptr, ptr }
+
+declare x86_thiscallcc ptr @quux(ptr, ptr, i32)
+
+define x86_thiscallcc ptr @baz(ptr %arg, ptr %arg1, ptr %arg2, i1 %arg3, ptr %arg4) personality ptr null {
+; CHECK-LABEL: define x86_thiscallcc ptr @baz(
+; CHECK-SAME: ptr [[ARG:%.*]], ptr [[ARG1:%.*]], ptr [[ARG2:%.*]], i1 [[ARG3:%.*]], ptr [[ARG4:%.*]]) personality ptr null {
+; CHECK-NEXT: [[BB:.*:]]
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [2 x %struct.foo], align 4
+; CHECK-NEXT: [[INVOKE:%.*]] = call x86_thiscallcc ptr @quux(ptr null, ptr null, i32 0) #[[ATTR1:[0-9]+]]
+; CHECK-NEXT: unreachable
+;
+bb:
+ %alloca = alloca [2 x %struct.foo], align 4
+ %invoke = invoke x86_thiscallcc ptr @quux(ptr null, ptr null, i32 0)
+ to label %bb5 unwind label %bb10
+
+bb5: ; preds = %bb
+ %getelementptr = getelementptr i8, ptr %arg, i32 20
+ %call = call x86_thiscallcc ptr null(ptr null, ptr null, i32 0)
+ br label %bb6
+
+bb6: ; preds = %bb10, %bb5
+ %phi = phi ptr [ null, %bb10 ], [ null, %bb5 ]
+ ret ptr %phi
+
+bb7: ; No predecessors!
+ %cleanuppad = cleanuppad within none []
+ %getelementptr8 = getelementptr i8, ptr %arg2, i32 -20
+ %icmp = icmp eq ptr %arg, null
+ br label %bb9
+
+bb9: ; preds = %bb7
+ cleanupret from %cleanuppad unwind label %bb10
+
+bb10: ; preds = %bb9, %bb
+ %phi11 = phi ptr [ %arg, %bb9 ], [ null, %bb ]
+ %cleanuppad12 = cleanuppad within none []
+ %getelementptr13 = getelementptr i8, ptr %phi11, i32 -20
+ store i32 0, ptr %phi11, align 4
+ br label %bb6
+}
+
+declare void @func(i64, i64, ptr)
>From 9d984b1874e1c5d190bf85d9402ad8d9ce2cb583 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Thu, 18 Sep 2025 21:25:02 +0200
Subject: [PATCH 2/2] Don't use make_early_inc_range where not needed and move
UniqueSuccessors to an outer scope
---
llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 1dd8cb4ee584c..8714741a08630 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -95,6 +95,7 @@ emptyAndDetachBlock(BasicBlock *BB,
void llvm::detachDeadBlocks(ArrayRef<BasicBlock *> BBs,
SmallVectorImpl<DominatorTree::UpdateType> *Updates,
bool KeepOneInputPHIs) {
+ SmallPtrSet<BasicBlock *, 4> UniqueEHRetBlocksToDelete;
for (auto *BB : BBs) {
auto NonFirstPhiIt = BB->getFirstNonPHIIt();
if (NonFirstPhiIt != BB->end()) {
@@ -106,7 +107,8 @@ void llvm::detachDeadBlocks(ArrayRef<BasicBlock *> BBs,
// first block, the we would have possible cleanupret and catchret
// instructions with poison arguments, which wouldn't be valid.
if (isa<FuncletPadInst>(I)) {
- SmallPtrSet<BasicBlock *, 4> UniqueEHRetBlocksToDelete;
+ UniqueEHRetBlocksToDelete.clear();
+
for (User *User : I.users()) {
Instruction *ReturnInstr = dyn_cast<Instruction>(User);
// If we have a cleanupret or catchret block, replace it with just an
@@ -118,12 +120,14 @@ void llvm::detachDeadBlocks(ArrayRef<BasicBlock *> BBs,
UniqueEHRetBlocksToDelete.insert(ReturnInstrBB);
}
}
- for (BasicBlock *EHRetBB :
- make_early_inc_range(UniqueEHRetBlocksToDelete))
+
+ for (BasicBlock *EHRetBB : UniqueEHRetBlocksToDelete)
emptyAndDetachBlock(EHRetBB, Updates, KeepOneInputPHIs);
}
}
+ UniqueEHRetBlocksToDelete.clear();
+
// Detaching and emptying the current basic block.
emptyAndDetachBlock(BB, Updates, KeepOneInputPHIs);
}
More information about the llvm-commits
mailing list