[llvm] 9679735 - [AMDGPU] Don't unify divergent exit nodes with `musttail` calls (#126395)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 9 18:48:28 PST 2025
Author: Shilei Tian
Date: 2025-02-09T21:48:24-05:00
New Revision: 967973512b9eba99dd8b04db42dbafcc50d94728
URL: https://github.com/llvm/llvm-project/commit/967973512b9eba99dd8b04db42dbafcc50d94728
DIFF: https://github.com/llvm/llvm-project/commit/967973512b9eba99dd8b04db42dbafcc50d94728.diff
LOG: [AMDGPU] Don't unify divergent exit nodes with `musttail` calls (#126395)
Fixes SWDEV-512254.
Added:
llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll
Modified:
llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index fda2a38c2464e00..d087fbc86545c99 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -215,7 +215,10 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
PDT.roots(), [&](auto BB) { return !isUniformlyReached(UA, *BB); });
for (BasicBlock *BB : PDT.roots()) {
- if (isa<ReturnInst>(BB->getTerminator())) {
+ if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+ auto *CI = dyn_cast_or_null<CallInst>(RI->getPrevNode());
+ if (CI && CI->isMustTailCall())
+ continue;
if (HasDivergentExitBlock)
ReturningBlocks.push_back(BB);
} else if (isa<UnreachableInst>(BB->getTerminator())) {
diff --git a/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll b/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll
new file mode 100644
index 000000000000000..007e3f0a6bdbc5c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=amdgpu-unify-divergent-exit-nodes -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
+
+declare void @foo(ptr)
+declare i1 @bar(ptr)
+
+define void @musttail_call_without_return_value(ptr %p) {
+; CHECK-LABEL: define void @musttail_call_without_return_value(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i1, ptr [[P]], align 1
+; CHECK-NEXT: br i1 [[LOAD]], label %[[BB_0:.*]], label %[[BB_1:.*]]
+; CHECK: [[BB_0]]:
+; CHECK-NEXT: musttail call void @foo(ptr [[P]])
+; CHECK-NEXT: ret void
+; CHECK: [[BB_1]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ %load = load i1, ptr %p, align 1
+ br i1 %load, label %bb.0, label %bb.1
+
+bb.0:
+ musttail call void @foo(ptr %p)
+ ret void
+
+bb.1:
+ ret void
+}
+
+define i1 @musttail_call_with_return_value(ptr %p) {
+; CHECK-LABEL: define i1 @musttail_call_with_return_value(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[LOAD:%.*]] = load i1, ptr [[P]], align 1
+; CHECK-NEXT: br i1 [[LOAD]], label %[[BB_0:.*]], label %[[BB_1:.*]]
+; CHECK: [[BB_0]]:
+; CHECK-NEXT: [[RET:%.*]] = musttail call i1 @bar(ptr [[P]])
+; CHECK-NEXT: ret i1 [[RET]]
+; CHECK: [[BB_1]]:
+; CHECK-NEXT: ret i1 [[LOAD]]
+;
+entry:
+ %load = load i1, ptr %p, align 1
+ br i1 %load, label %bb.0, label %bb.1
+
+bb.0:
+ %ret = musttail call i1 @bar(ptr %p)
+ ret i1 %ret
+
+bb.1:
+ ret i1 %load
+}
More information about the llvm-commits
mailing list