[llvm] [AMDGPU] Don't unify divergent exit nodes with `musttail` calls (PR #126395)
Shilei Tian via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 9 07:13:13 PST 2025
https://github.com/shiltian updated https://github.com/llvm/llvm-project/pull/126395
>From aa55874411831b1e3975818ec2d652ec8dd7acc4 Mon Sep 17 00:00:00 2001
From: Shilei Tian <i at tianshilei.me>
Date: Sun, 9 Feb 2025 10:12:57 -0500
Subject: [PATCH] [AMDGPU] Don't unify divergent exit nodes with musttail calls
Fixes SWDEV-512254.
---
.../AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp | 5 +-
...nify-divergent-exit-nodes-with-musttail.ll | 53 +++++++++++++++++++
2 files changed, 57 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index fda2a38c2464e00..d087fbc86545c99 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -215,7 +215,10 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
PDT.roots(), [&](auto BB) { return !isUniformlyReached(UA, *BB); });
for (BasicBlock *BB : PDT.roots()) {
- if (isa<ReturnInst>(BB->getTerminator())) {
+ if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ auto *CI = dyn_cast_or_null<CallInst>(RI->getPrevNode());
+ if (CI && CI->isMustTailCall())
+ continue;
if (HasDivergentExitBlock)
ReturningBlocks.push_back(BB);
} else if (isa<UnreachableInst>(BB->getTerminator())) {
diff --git a/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll b/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll
new file mode 100644
index 000000000000000..007e3f0a6bdbc5c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=amdgpu-unify-divergent-exit-nodes -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
+
+declare void @foo(ptr)
+declare i1 @bar(ptr)
+
+define void @musttail_call_without_return_value(ptr %p) {
+; CHECK-LABEL: define void @musttail_call_without_return_value(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i1, ptr [[P]], align 1
+; CHECK-NEXT: br i1 [[LOAD]], label %[[BB_0:.*]], label %[[BB_1:.*]]
+; CHECK: [[BB_0]]:
+; CHECK-NEXT: musttail call void @foo(ptr [[P]])
+; CHECK-NEXT: ret void
+; CHECK: [[BB_1]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %load = load i1, ptr %p, align 1
+ br i1 %load, label %bb.0, label %bb.1
+
+bb.0:
+ musttail call void @foo(ptr %p)
+ ret void
+
+bb.1:
+ ret void
+}
+
+define i1 @musttail_call_with_return_value(ptr %p) {
+; CHECK-LABEL: define i1 @musttail_call_with_return_value(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i1, ptr [[P]], align 1
+; CHECK-NEXT: br i1 [[LOAD]], label %[[BB_0:.*]], label %[[BB_1:.*]]
+; CHECK: [[BB_0]]:
+; CHECK-NEXT: [[RET:%.*]] = musttail call i1 @bar(ptr [[P]])
+; CHECK-NEXT: ret i1 [[RET]]
+; CHECK: [[BB_1]]:
+; CHECK-NEXT: ret i1 [[LOAD]]
+;
+entry:
+ %load = load i1, ptr %p, align 1
+ br i1 %load, label %bb.0, label %bb.1
+
+bb.0:
+ %ret = musttail call i1 @bar(ptr %p)
+ ret i1 %ret
+
+bb.1:
+ ret i1 %load
+}
More information about the llvm-commits
mailing list