[llvm] [AMDGPU][GlobalISel] Fix / workaround amdgcn.kill/.unreachable lowering (PR #170639)

Robert Imschweiler via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 4 02:48:32 PST 2025


https://github.com/ro-i created https://github.com/llvm/llvm-project/pull/170639

cf. https://github.com/llvm/llvm-project/pull/133907#issuecomment-3611354688

>From 0e517e118df4ffb38064f9421da3dd3c5703c2c2 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Thu, 4 Dec 2025 04:45:21 -0600
Subject: [PATCH] [AMDGPU][GlobalISel] Fix / workaround
 amdgcn.kill/.unreachable lowering

cf. https://github.com/llvm/llvm-project/pull/133907#issuecomment-3611354688
---
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  | 25 ++++++++++++++-----
 llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll | 22 +++++++++++++---
 2 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index d9a7e898076b3..7fff97f8336a0 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -3088,18 +3088,31 @@ bool IRTranslator::translateCallBr(const User &U,
     return false;
 
   // Retrieve successors.
-  SmallPtrSet<BasicBlock *, 8> Dests = {I.getDefaultDest()};
+  SmallPtrSet<BasicBlock *, 8> Dests;
+  Dests.insert(I.getDefaultDest());
   MachineBasicBlock *Return = &getMBB(*I.getDefaultDest());
 
   // Update successor info.
   addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne());
-  // TODO: For most of the cases where there is an intrinsic callbr, we're
-  // having exactly one indirect target, which will be unreachable. As soon as
-  // this changes, we might need to enhance
-  // Target->setIsInlineAsmBrIndirectTarget or add something similar for
-  // intrinsic indirect branches.
+
+  // Add indirect targets as successors. For intrinsic callbr, these represent
+  // implicit control flow (e.g., the "kill" path for amdgcn.kill). We mark them
+  // with setIsInlineAsmBrIndirectTarget so the machine verifier accepts them as
+  // valid successors, even though they're not from inline asm.
+  for (BasicBlock *Dest : I.getIndirectDests()) {
+    MachineBasicBlock *Target = &getMBB(*Dest);
+    Target->setIsInlineAsmBrIndirectTarget();
+    Target->setLabelMustBeEmitted();
+    // Don't add duplicate machine successors.
+    if (Dests.insert(Dest).second)
+      addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero());
+  }
+
   CallBrMBB->normalizeSuccProbs();
 
+  // Drop into default successor.
+  MIRBuilder.buildBr(*Return);
+
   return true;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll
index ff1e23836fed2..8ac31b3c70ed7 100644
--- a/llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll
@@ -32,14 +32,21 @@ define void @test_kill(ptr %src, ptr %dst, i1 %c) {
 ; GISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GISEL-NEXT:    s_andn2_b64 s[6:7], exec, vcc
 ; GISEL-NEXT:    s_andn2_b64 s[4:5], s[4:5], s[6:7]
-; GISEL-NEXT:    s_cbranch_scc0 .LBB0_2
+; GISEL-NEXT:    s_cbranch_scc0 .LBB0_4
 ; GISEL-NEXT:  ; %bb.1:
 ; GISEL-NEXT:    s_and_b64 exec, exec, s[4:5]
+; GISEL-NEXT:  ; %bb.2: ; %cont
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    flat_store_dword v[2:3], v0
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
-; GISEL-NEXT:  .LBB0_2:
+; GISEL-NEXT:  .LBB0_3: ; Inline asm indirect target
+; GISEL-NEXT:    ; %kill
+; GISEL-NEXT:    ; Label of block must be emitted
+; GISEL-NEXT:    ; divergent unreachable
+; GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB0_4:
 ; GISEL-NEXT:    s_mov_b64 exec, 0
 ; GISEL-NEXT:    s_endpgm
   %a = load i32, ptr %src, align 4
@@ -81,14 +88,21 @@ define void @test_kill_block_order(ptr %src, ptr %dst, i1 %c) {
 ; GISEL-NEXT:    s_mov_b64 s[4:5], exec
 ; GISEL-NEXT:    s_andn2_b64 s[6:7], exec, vcc
 ; GISEL-NEXT:    s_andn2_b64 s[4:5], s[4:5], s[6:7]
-; GISEL-NEXT:    s_cbranch_scc0 .LBB1_2
+; GISEL-NEXT:    s_cbranch_scc0 .LBB1_4
 ; GISEL-NEXT:  ; %bb.1:
 ; GISEL-NEXT:    s_and_b64 exec, exec, s[4:5]
+; GISEL-NEXT:  ; %bb.2: ; %cont
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    flat_store_dword v[2:3], v0
 ; GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GISEL-NEXT:    s_setpc_b64 s[30:31]
-; GISEL-NEXT:  .LBB1_2:
+; GISEL-NEXT:  .LBB1_3: ; Inline asm indirect target
+; GISEL-NEXT:    ; %kill
+; GISEL-NEXT:    ; Label of block must be emitted
+; GISEL-NEXT:    ; divergent unreachable
+; GISEL-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GISEL-NEXT:  .LBB1_4:
 ; GISEL-NEXT:    s_mov_b64 exec, 0
 ; GISEL-NEXT:    s_endpgm
   %a = load i32, ptr %src, align 4



More information about the llvm-commits mailing list