[llvm] [BasicBlockUtils] Handle funclets when detaching EH pad blocks (PR #157363)

Gábor Spaits via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 11 00:54:16 PDT 2025


https://github.com/spaits updated https://github.com/llvm/llvm-project/pull/157363

>From efade811fd44ee096ae102ab941394d1d3db7ca9 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Tue, 9 Sep 2025 12:51:46 +0200
Subject: [PATCH 1/5] Replace unreachable cleanupret and catchret instruction
 with unreachable instruction

---
 llvm/lib/Transforms/Utils/BasicBlockUtils.cpp |  20 ++-
 .../unreachable-multi-basic-block-funclet.ll  | 169 ++++++++++++++++++
 2 files changed, 188 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll

diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index cad0b4c12b54e..3515374a58ca1 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -75,7 +75,25 @@ void llvm::detachDeadBlocks(
     // Zap all the instructions in the block.
     while (!BB->empty()) {
       Instruction &I = BB->back();
-      // If this instruction is used, replace uses with an arbitrary value.
+      // Exception handling funclets need to be explicitly addressed.
+      // These funclets must begin with cleanuppad or catchpad and end with
+      // cleanupred or catchret. The return instructions can be in different
+      // basic blocks than the pad instruction. If we would only delete the
+      // first block, the we would have possible cleanupret and catchret
+      // instructions with poison arguments, which wouldn't be valid.
+      unsigned OpCode = I.getOpcode();
+      if (OpCode == Instruction::CatchPad ||
+          OpCode == Instruction::CleanupPad) {
+        for (User *User : make_early_inc_range(I.users())) {
+          Instruction *ReturnInstr = dyn_cast<Instruction>(User);
+          if (isa<CatchReturnInst>(ReturnInstr) ||
+              isa<CleanupReturnInst>(ReturnInstr)) {
+            BasicBlock *ReturnInstrBB = ReturnInstr->getParent();
+            ReturnInstr->eraseFromParent();
+            new UnreachableInst(ReturnInstrBB->getContext(), ReturnInstrBB);
+          }
+        }
+      }
       // Because control flow can't get here, we don't care what we replace the
       // value with.  Note that since this block is unreachable, and all values
       // contained within it must dominate their uses, that all uses will
diff --git a/llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll b/llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll
new file mode 100644
index 0000000000000..d2fccae6770db
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/unreachable-multi-basic-block-funclet.ll
@@ -0,0 +1,169 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=simplifycfg -S < %s | FileCheck %s
+
+; cleanuppad/cleanupret
+
+define void @unreachable_cleanuppad_linear(i64 %shapes.1) personality ptr null {
+; CHECK-LABEL: define void @unreachable_cleanuppad_linear(
+; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null {
+; CHECK-NEXT:  [[START:.*:]]
+; CHECK-NEXT:    [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]]
+; CHECK-NEXT:    ret void
+;
+start:
+  %_7 = icmp ult i64 0, %shapes.1
+  ret void
+
+funclet:
+  %cleanuppad = cleanuppad within none []
+  br label %funclet_end
+
+funclet_end:
+  cleanupret from %cleanuppad unwind to caller
+}
+
+define void @unreachable_cleanuppad_multiple_predecessors(i64 %shapes.1) personality ptr null {
+; CHECK-LABEL: define void @unreachable_cleanuppad_multiple_predecessors(
+; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null {
+; CHECK-NEXT:  [[START:.*:]]
+; CHECK-NEXT:    [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]]
+; CHECK-NEXT:    ret void
+;
+start:
+  %_7 = icmp ult i64 0, %shapes.1
+  ret void
+
+funclet:
+  %cleanuppad = cleanuppad within none []
+  switch i64 %shapes.1, label %otherwise [ i64 0, label %one
+  i64 1, label %two
+  i64 42, label %three ]
+one:
+  br label %funclet_end
+
+two:
+  br label %funclet_end
+
+three:
+  br label %funclet_end
+
+otherwise:
+  br label %funclet_end
+
+funclet_end:
+  cleanupret from %cleanuppad unwind to caller
+}
+
+; catchpad/catchret
+
+define void @unreachable_catchpad_linear(i64 %shapes.1) personality ptr null {
+; CHECK-LABEL: define void @unreachable_catchpad_linear(
+; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null {
+; CHECK-NEXT:  [[START:.*:]]
+; CHECK-NEXT:    [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]]
+; CHECK-NEXT:    ret void
+;
+start:
+  %_7 = icmp ult i64 0, %shapes.1
+  ret void
+
+dispatch:
+  %cs = catchswitch within none [label %funclet] unwind to caller
+
+funclet:
+  %cleanuppad = catchpad within %cs []
+  br label %funclet_end
+
+
+funclet_end:
+  catchret from %cleanuppad to label %unreachable
+
+unreachable:
+  unreachable
+}
+
+define void @unreachable_catchpad_multiple_predecessors(i64 %shapes.1) personality ptr null {
+; CHECK-LABEL: define void @unreachable_catchpad_multiple_predecessors(
+; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null {
+; CHECK-NEXT:  [[START:.*:]]
+; CHECK-NEXT:    [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]]
+; CHECK-NEXT:    ret void
+;
+start:
+  %_7 = icmp ult i64 0, %shapes.1
+  ret void
+
+dispatch:
+  %cs = catchswitch within none [label %funclet] unwind to caller
+
+funclet:
+  %cleanuppad = catchpad within %cs []
+  switch i64 %shapes.1, label %otherwise [ i64 0, label %one
+  i64 1, label %two
+  i64 42, label %three ]
+one:
+  br label %funclet_end
+
+two:
+  br label %funclet_end
+
+three:
+  br label %funclet_end
+
+otherwise:
+  br label %funclet_end
+
+funclet_end:
+  catchret from %cleanuppad to label %unreachable
+
+unreachable:
+  unreachable
+}
+
+; Issue reproducer
+
+define void @gh148052(i64 %shapes.1) personality ptr null {
+; CHECK-LABEL: define void @gh148052(
+; CHECK-SAME: i64 [[SHAPES_1:%.*]]) personality ptr null {
+; CHECK-NEXT:  [[START:.*:]]
+; CHECK-NEXT:    [[_7:%.*]] = icmp ult i64 0, [[SHAPES_1]]
+; CHECK-NEXT:    call void @llvm.assume(i1 [[_7]])
+; CHECK-NEXT:    ret void
+;
+start:
+  %_7 = icmp ult i64 0, %shapes.1
+  br i1 %_7, label %bb1, label %panic
+
+bb1:
+  %_11 = icmp ult i64 0, %shapes.1
+  br i1 %_11, label %bb3, label %panic1
+
+panic:
+  unreachable
+
+bb3:
+  ret void
+
+panic1:
+  invoke void @func(i64 0, i64 0, ptr null)
+  to label %unreachable unwind label %funclet_bb14
+
+funclet_bb14:
+  %cleanuppad = cleanuppad within none []
+  br label %bb13
+
+unreachable:
+  unreachable
+
+bb10:
+  cleanupret from %cleanuppad5 unwind to caller
+
+funclet_bb10:
+  %cleanuppad5 = cleanuppad within none []
+  br label %bb10
+
+bb13:
+  cleanupret from %cleanuppad unwind label %funclet_bb10
+}
+
+declare void @func(i64, i64, ptr)

>From 063fccfb8ff09c791dae4599810f5a1040b0a57a Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Tue, 9 Sep 2025 22:56:10 +0200
Subject: [PATCH 2/5] Address the case when catchswitch leaves poisoned pads

---
 llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 31 +++++++++++++------
 1 file changed, 21 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 3515374a58ca1..954313cea2aef 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -58,6 +58,18 @@ static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth(
              "is followed by a block that either has a terminating "
              "deoptimizing call or is terminated with an unreachable"));
 
+static void replaceEHPadsRetWithUnreachable(Instruction &I) {
+  for (User *User : make_early_inc_range(I.users())) {
+    Instruction *ReturnInstr = dyn_cast<Instruction>(User);
+    if (isa<CatchReturnInst>(ReturnInstr) ||
+        isa<CleanupReturnInst>(ReturnInstr)) {
+      BasicBlock *ReturnInstrBB = ReturnInstr->getParent();
+      ReturnInstr->eraseFromParent();
+      new UnreachableInst(ReturnInstrBB->getContext(), ReturnInstrBB);
+    }
+  }
+}
+
 void llvm::detachDeadBlocks(
     ArrayRef<BasicBlock *> BBs,
     SmallVectorImpl<DominatorTree::UpdateType> *Updates,
@@ -82,16 +94,15 @@ void llvm::detachDeadBlocks(
       // first block, the we would have possible cleanupret and catchret
       // instructions with poison arguments, which wouldn't be valid.
       unsigned OpCode = I.getOpcode();
-      if (OpCode == Instruction::CatchPad ||
-          OpCode == Instruction::CleanupPad) {
-        for (User *User : make_early_inc_range(I.users())) {
-          Instruction *ReturnInstr = dyn_cast<Instruction>(User);
-          if (isa<CatchReturnInst>(ReturnInstr) ||
-              isa<CleanupReturnInst>(ReturnInstr)) {
-            BasicBlock *ReturnInstrBB = ReturnInstr->getParent();
-            ReturnInstr->eraseFromParent();
-            new UnreachableInst(ReturnInstrBB->getContext(), ReturnInstrBB);
-          }
+      if (isa<FuncletPadInst>(I)) {
+        replaceEHPadsRetWithUnreachable(I);
+      }
+      if (OpCode == Instruction::CatchSwitch) {
+        CatchSwitchInst &CSI = cast<CatchSwitchInst>(I);
+        for (unsigned I = 0; I < CSI.getNumSuccessors(); I++) {
+          BasicBlock *SucBlock = CSI.getSuccessor(I);
+          Instruction &PadInstr = *(SucBlock->getFirstNonPHIIt());
+          replaceEHPadsRetWithUnreachable(PadInstr);
         }
       }
       // Because control flow can't get here, we don't care what we replace the

>From 0376e58245b52400cbd17906f4cea1374e954fc2 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Wed, 10 Sep 2025 13:01:51 +0200
Subject: [PATCH 3/5] Clean up and add assertion

---
 llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 26 +++++++++++--------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 954313cea2aef..9b43200fdbbf3 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -58,7 +58,8 @@ static cl::opt<unsigned> MaxDeoptOrUnreachableSuccessorCheckDepth(
              "is followed by a block that either has a terminating "
              "deoptimizing call or is terminated with an unreachable"));
 
-static void replaceEHPadsRetWithUnreachable(Instruction &I) {
+static void replaceFuncletPadsRetWithUnreachable(Instruction &I) {
+  assert(isa<FuncletPadInst>(I) && "Instruction must be a funclet pad!");
   for (User *User : make_early_inc_range(I.users())) {
     Instruction *ReturnInstr = dyn_cast<Instruction>(User);
     if (isa<CatchReturnInst>(ReturnInstr) ||
@@ -93,16 +94,19 @@ void llvm::detachDeadBlocks(
       // basic blocks than the pad instruction. If we would only delete the
       // first block, the we would have possible cleanupret and catchret
       // instructions with poison arguments, which wouldn't be valid.
-      unsigned OpCode = I.getOpcode();
-      if (isa<FuncletPadInst>(I)) {
-        replaceEHPadsRetWithUnreachable(I);
-      }
-      if (OpCode == Instruction::CatchSwitch) {
-        CatchSwitchInst &CSI = cast<CatchSwitchInst>(I);
-        for (unsigned I = 0; I < CSI.getNumSuccessors(); I++) {
-          BasicBlock *SucBlock = CSI.getSuccessor(I);
-          Instruction &PadInstr = *(SucBlock->getFirstNonPHIIt());
-          replaceEHPadsRetWithUnreachable(PadInstr);
+      if (isa<FuncletPadInst>(I))
+        replaceFuncletPadsRetWithUnreachable(I);
+
+      // Catchswitch instructions have handlers, that must be catchpads.
+      CatchSwitchInst *CSI = dyn_cast<CatchSwitchInst>(&I);
+      if (CSI) {
+        BasicBlock *UnwindDest = CSI->getUnwindDest();
+        for (unsigned I = 0; I < CSI->getNumSuccessors(); I++) {
+          BasicBlock *SucBlock = CSI->getSuccessor(I);
+          if (SucBlock != UnwindDest) {
+            Instruction &PadInstr = *(SucBlock->getFirstNonPHIIt());
+            replaceFuncletPadsRetWithUnreachable(PadInstr);
+          }
         }
       }
       // Because control flow can't get here, we don't care what we replace the

>From 0893d423323f9c084a8526390061c5041ab853b3 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Wed, 10 Sep 2025 23:20:25 +0200
Subject: [PATCH 4/5] Handle catchswitch's unwind

---
 llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 22 +++++++++++++------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 9b43200fdbbf3..8787d0bc438d5 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -97,18 +97,26 @@ void llvm::detachDeadBlocks(
       if (isa<FuncletPadInst>(I))
         replaceFuncletPadsRetWithUnreachable(I);
 
-      // Catchswitch instructions have handlers, that must be catchpads.
+      // Catchswitch instructions have handlers, that must be catchpads and
+      // an unwind label, that is either a catchpad or catchswitch.
       CatchSwitchInst *CSI = dyn_cast<CatchSwitchInst>(&I);
-      if (CSI) {
-        BasicBlock *UnwindDest = CSI->getUnwindDest();
+      if (CSI)
+        // Iterating over the handlers and the unwind basic block and precssing
+        // catchpads. If the unwind label is a catchswitch, we just replace the
+        // label wit poison later on.
         for (unsigned I = 0; I < CSI->getNumSuccessors(); I++) {
           BasicBlock *SucBlock = CSI->getSuccessor(I);
-          if (SucBlock != UnwindDest) {
-            Instruction &PadInstr = *(SucBlock->getFirstNonPHIIt());
-            replaceFuncletPadsRetWithUnreachable(PadInstr);
+          Instruction &SucFstInst = *(SucBlock->getFirstNonPHIIt());
+          if (isa<FuncletPadInst>(SucFstInst)) {
+            replaceFuncletPadsRetWithUnreachable(SucFstInst);
+            // There may be catchswitch instructions using the catchpad.
+            // Just replace those with poison.
+            if (!SucFstInst.use_empty())
+              SucFstInst.replaceAllUsesWith(
+                  PoisonValue::get(SucFstInst.getType()));
+            SucFstInst.eraseFromParent();
           }
         }
-      }
       // Because control flow can't get here, we don't care what we replace the
       // value with.  Note that since this block is unreachable, and all values
       // contained within it must dominate their uses, that all uses will

>From 5e2fc0068fc3d3e6d8b1efb40a2f5796fc9aab32 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <gaborspaits1 at gmail.com>
Date: Thu, 11 Sep 2025 09:51:12 +0200
Subject: [PATCH 5/5] Formating and typo

---
 llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 8787d0bc438d5..d656217d6e01c 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -99,11 +99,10 @@ void llvm::detachDeadBlocks(
 
       // Catchswitch instructions have handlers, that must be catchpads and
       // an unwind label, that is either a catchpad or catchswitch.
-      CatchSwitchInst *CSI = dyn_cast<CatchSwitchInst>(&I);
-      if (CSI)
-        // Iterating over the handlers and the unwind basic block and precssing
+      if (CatchSwitchInst *CSI = dyn_cast<CatchSwitchInst>(&I)) {
+        // Iterating over the handlers and the unwind basic block and processing
         // catchpads. If the unwind label is a catchswitch, we just replace the
-        // label wit poison later on.
+        // label with poison later on.
         for (unsigned I = 0; I < CSI->getNumSuccessors(); I++) {
           BasicBlock *SucBlock = CSI->getSuccessor(I);
           Instruction &SucFstInst = *(SucBlock->getFirstNonPHIIt());
@@ -117,6 +116,7 @@ void llvm::detachDeadBlocks(
             SucFstInst.eraseFromParent();
           }
         }
+      }
       // Because control flow can't get here, we don't care what we replace the
       // value with.  Note that since this block is unreachable, and all values
       // contained within it must dominate their uses, that all uses will



More information about the llvm-commits mailing list