[PATCH] D71192: AMDGPU: Fix AMDGPUUnifyDivergentExitNodes with no normal returns

Connor Abbott via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 30 02:08:25 PST 2020


This revision was automatically updated to reflect the committed changes.
Closed by commit rGce06d50756e9: AMDGPU: Fix AMDGPUUnifyDivergentExitNodes with no normal returns (authored by cwabbott).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D71192/new/

https://reviews.llvm.org/D71192

Files:
  llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
  llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
  llvm/test/CodeGen/AMDGPU/update-phi.ll


Index: llvm/test/CodeGen/AMDGPU/update-phi.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/update-phi.ll
+++ llvm/test/CodeGen/AMDGPU/update-phi.ll
@@ -14,12 +14,13 @@
 ; IR-NEXT:    [[DOT01:%.*]] = phi float [ 0.000000e+00, [[DOTLOOPEXIT]] ], [ [[N29:%.*]], [[TRANSITIONBLOCK:%.*]] ]
 ; IR-NEXT:    [[N29]] = fadd float [[DOT01]], 1.000000e+00
 ; IR-NEXT:    [[N30:%.*]] = fcmp ogt float [[N29]], 4.000000e+00
-; IR-NEXT:    br i1 true, label [[TRANSITIONBLOCK]], label [[DUMMYRETURNBLOCK:%.*]]
+; IR-NEXT:    br i1 true, label [[TRANSITIONBLOCK]], label [[UNIFIEDRETURNBLOCK:%.*]]
 ; IR:       TransitionBlock:
 ; IR-NEXT:    br i1 [[N30]], label [[DOTLOOPEXIT]], label [[N28]]
 ; IR:       n31:
 ; IR-NEXT:    ret void
-; IR:       DummyReturnBlock:
+; IR:       UnifiedReturnBlock:
+; IR-NEXT:    call void @llvm.amdgcn.exp.f32(i32 9, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 true)
 ; IR-NEXT:    ret void
 ;
 .entry:
Index: llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
===================================================================
--- llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
+++ llvm/test/CodeGen/AMDGPU/kill-infinite-loop.ll
@@ -45,6 +45,22 @@
   ret void
 }
 
+; test the case where there's only a kill in an infinite loop
+; CHECK-LABEL: only_kill
+; CHECK: exp null off, off, off, off done vm
+; CHECK-NEXT: s_endpgm
+; SIInsertSkips inserts an extra null export here, but it should be harmless.
+; CHECK: exp null off, off, off, off done vm
+; CHECK-NEXT: s_endpgm
+define amdgpu_ps void @only_kill() #0 {
+main_body:
+  br label %loop
+
+loop:
+  call void @llvm.amdgcn.kill(i1 false) #3
+  br label %loop
+}
+
 ; In case there's an epilog, we shouldn't have to do this.
 ; CHECK-LABEL: return_nonvoid
 ; CHECK-NOT: exp null off, off, off, off done vm
Index: llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -195,7 +195,12 @@
 
 bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) {
   auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
-  if (PDT.getRoots().size() <= 1)
+
+  // If there's only one exit, we don't need to do anything, unless this is a
+  // pixel shader and that exit is an infinite loop, since we still have to
+  // insert an export in that case.
+  if (PDT.getRoots().size() <= 1 &&
+      F.getCallingConv() != CallingConv::AMDGPU_PS)
     return false;
 
   LegacyDivergenceAnalysis &DA = getAnalysis<LegacyDivergenceAnalysis>();
@@ -321,7 +326,7 @@
   if (ReturningBlocks.empty())
     return false; // No blocks return
 
-  if (ReturningBlocks.size() == 1)
+  if (ReturningBlocks.size() == 1 && !InsertExport)
     return false; // Already has a single return block
 
   const TargetTransformInfo &TTI


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D71192.241372.patch
Type: text/x-patch
Size: 2970 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200130/86586e55/attachment.bin>


More information about the llvm-commits mailing list