[llvm] 9679735 - [AMDGPU] Don't unify divergent exit nodes with `musttail` calls (#126395)

via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 9 18:48:28 PST 2025


Author: Shilei Tian
Date: 2025-02-09T21:48:24-05:00
New Revision: 967973512b9eba99dd8b04db42dbafcc50d94728

URL: https://github.com/llvm/llvm-project/commit/967973512b9eba99dd8b04db42dbafcc50d94728
DIFF: https://github.com/llvm/llvm-project/commit/967973512b9eba99dd8b04db42dbafcc50d94728.diff

LOG: [AMDGPU] Don't unify divergent exit nodes with `musttail` calls (#126395)

Fixes SWDEV-512254.

Added: 
    llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index fda2a38c2464e00..d087fbc86545c99 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -215,7 +215,10 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT,
       PDT.roots(), [&](auto BB) { return !isUniformlyReached(UA, *BB); });
 
   for (BasicBlock *BB : PDT.roots()) {
-    if (isa<ReturnInst>(BB->getTerminator())) {
+    if (auto *RI = dyn_cast<ReturnInst>(BB->getTerminator())) {
+      auto *CI = dyn_cast_or_null<CallInst>(RI->getPrevNode());
+      if (CI && CI->isMustTailCall())
+        continue;
       if (HasDivergentExitBlock)
         ReturningBlocks.push_back(BB);
     } else if (isa<UnreachableInst>(BB->getTerminator())) {

diff  --git a/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll b/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll
new file mode 100644
index 000000000000000..007e3f0a6bdbc5c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -passes=amdgpu-unify-divergent-exit-nodes -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a %s -o - | FileCheck %s
+
+declare void @foo(ptr)
+declare i1 @bar(ptr)
+
+define void @musttail_call_without_return_value(ptr %p) {
+; CHECK-LABEL: define void @musttail_call_without_return_value(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i1, ptr [[P]], align 1
+; CHECK-NEXT:    br i1 [[LOAD]], label %[[BB_0:.*]], label %[[BB_1:.*]]
+; CHECK:       [[BB_0]]:
+; CHECK-NEXT:    musttail call void @foo(ptr [[P]])
+; CHECK-NEXT:    ret void
+; CHECK:       [[BB_1]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %load = load i1, ptr %p, align 1
+  br i1 %load, label %bb.0, label %bb.1
+
+bb.0:
+  musttail call void @foo(ptr %p)
+  ret void
+
+bb.1:
+  ret void
+}
+
+define i1 @musttail_call_with_return_value(ptr %p) {
+; CHECK-LABEL: define i1 @musttail_call_with_return_value(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[LOAD:%.*]] = load i1, ptr [[P]], align 1
+; CHECK-NEXT:    br i1 [[LOAD]], label %[[BB_0:.*]], label %[[BB_1:.*]]
+; CHECK:       [[BB_0]]:
+; CHECK-NEXT:    [[RET:%.*]] = musttail call i1 @bar(ptr [[P]])
+; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK:       [[BB_1]]:
+; CHECK-NEXT:    ret i1 [[LOAD]]
+;
+entry:
+  %load = load i1, ptr %p, align 1
+  br i1 %load, label %bb.0, label %bb.1
+
+bb.0:
+  %ret = musttail call i1 @bar(ptr %p)
+  ret i1 %ret
+
+bb.1:
+  ret i1 %load
+}


        


More information about the llvm-commits mailing list