[llvm] 9dcd75f - [AMDGPU] Allow frontends to disable null export for pixel shaders

Carl Ritson via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 21 18:21:18 PDT 2021


Author: Carl Ritson
Date: 2021-07-22T10:20:46+09:00
New Revision: 9dcd75f86f2457b31a8eca4fa47f28d2b912dec4

URL: https://github.com/llvm/llvm-project/commit/9dcd75f86f2457b31a8eca4fa47f28d2b912dec4
DIFF: https://github.com/llvm/llvm-project/commit/9dcd75f86f2457b31a8eca4fa47f28d2b912dec4.diff

LOG: [AMDGPU] Allow frontends to disable null export for pixel shaders

Disable null export (for kills) when a frontend defines a pixel
shader as not exporting using amdgpu-color-export and
amdgpu-depth-export function attrbutes.
This allows the generation of export free pixel shaders.

Reviewed By: foad

Differential Revision: https://reviews.llvm.org/D105683

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
    llvm/test/CodeGen/AMDGPU/early-term.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
index 3fb96f15313d4..d560b477b8ba7 100644
--- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
@@ -67,9 +67,19 @@ char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID;
 
 static void generateEndPgm(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator I, DebugLoc DL,
-                           const SIInstrInfo *TII, bool IsPS) {
-  // "null export"
-  if (IsPS) {
+                           const SIInstrInfo *TII, MachineFunction &MF) {
+  const Function &F = MF.getFunction();
+  bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS;
+
+  // Check if hardware has been configured to expect color or depth exports.
+  bool HasExports =
+      AMDGPU::getHasColorExport(F) || AMDGPU::getHasDepthExport(F);
+
+  // Prior to GFX10, hardware always expects at least one export for PS.
+  bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget());
+
+  if (IsPS && (HasExports || MustExport)) {
+    // Generate "null export" if hardware is expecting PS to export.
     BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
         .addImm(AMDGPU::Exp::ET_NULL)
         .addReg(AMDGPU::VGPR0, RegState::Undef)
@@ -80,6 +90,7 @@ static void generateEndPgm(MachineBasicBlock &MBB,
         .addImm(0)  // compr
         .addImm(0); // en
   }
+
   // s_endpgm
   BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
 }
@@ -168,8 +179,7 @@ bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
     BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc),
             ExecReg)
         .addImm(0);
-    generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII,
-                   MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS);
+    generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF);
 
     for (MachineInstr *Instr : EarlyTermInstrs) {
       // Early termination in GS does nothing

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 8cd748eaec6b4..29bbf50cbfdc3 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1344,6 +1344,17 @@ unsigned getInitialPSInputAddr(const Function &F) {
   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
 }
 
+bool getHasColorExport(const Function &F) {
+  // As a safe default always respond as if PS has color exports.
+  return getIntegerAttribute(
+             F, "amdgpu-color-export",
+             F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
+}
+
+bool getHasDepthExport(const Function &F) {
+  return getIntegerAttribute(F, "amdgpu-depth-export", 0) != 0;
+}
+
 bool isShader(CallingConv::ID cc) {
   switch(cc) {
     case CallingConv::AMDGPU_VS:

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 40344c0281a2f..72c872dec5ba7 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -689,6 +689,10 @@ uint64_t encodeMsg(uint64_t MsgId,
 
 unsigned getInitialPSInputAddr(const Function &F);
 
+bool getHasColorExport(const Function &F);
+
+bool getHasDepthExport(const Function &F);
+
 LLVM_READNONE
 bool isShader(CallingConv::ID CC);
 

diff  --git a/llvm/test/CodeGen/AMDGPU/early-term.mir b/llvm/test/CodeGen/AMDGPU/early-term.mir
index ffe56c4085fdd..a8a245b45af6d 100644
--- a/llvm/test/CodeGen/AMDGPU/early-term.mir
+++ b/llvm/test/CodeGen/AMDGPU/early-term.mir
@@ -1,5 +1,5 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-late-branch-lowering -verify-machineinstrs  %s -o - | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-late-branch-lowering -verify-machineinstrs  %s -o - | FileCheck -check-prefixes=GCN,GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs  %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s
 
 --- |
   define amdgpu_ps void @early_term_scc0_end_block() {
@@ -21,6 +21,12 @@
   define amdgpu_cs void @early_term_scc0_cs() {
     ret void
   }
+
+  define amdgpu_ps void @early_term_no_export() #0 {
+    ret void
+  }
+
+  attributes #0 = { "amdgpu-color-export"="0" "amdgpu-depth-export"="0" }
 ...
 
 ---
@@ -30,21 +36,21 @@ liveins:
   - { reg: '$sgpr0' }
   - { reg: '$sgpr1' }
 body: |
-  ; GFX10-LABEL: name: early_term_scc0_end_block
-  ; GFX10: bb.0:
-  ; GFX10:   successors: %bb.1(0x80000000), %bb.2(0x00000000)
-  ; GFX10:   liveins: $sgpr0, $sgpr1
-  ; GFX10:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX10:   dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
-  ; GFX10:   S_CBRANCH_SCC0 %bb.2, implicit $scc
-  ; GFX10: bb.1:
-  ; GFX10:   liveins: $vgpr0
-  ; GFX10:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-  ; GFX10:   S_ENDPGM 0
-  ; GFX10: bb.2:
-  ; GFX10:   $exec_lo = S_MOV_B32 0
-  ; GFX10:   EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
-  ; GFX10:   S_ENDPGM 0
+  ; GCN-LABEL: name: early_term_scc0_end_block
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x80000000), %bb.2(0x00000000)
+  ; GCN:   liveins: $sgpr0, $sgpr1
+  ; GCN:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN: bb.1:
+  ; GCN:   liveins: $vgpr0
+  ; GCN:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+  ; GCN:   S_ENDPGM 0
+  ; GCN: bb.2:
+  ; GCN:   $exec = S_MOV_B64 0
+  ; GCN:   EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+  ; GCN:   S_ENDPGM 0
   bb.0:
     liveins: $sgpr0, $sgpr1
     successors: %bb.1
@@ -66,25 +72,25 @@ liveins:
   - { reg: '$sgpr0' }
   - { reg: '$sgpr1' }
 body: |
-  ; GFX10-LABEL: name: early_term_scc0_next_terminator
-  ; GFX10: bb.0:
-  ; GFX10:   successors: %bb.2(0x80000000), %bb.3(0x00000000)
-  ; GFX10:   liveins: $sgpr0, $sgpr1
-  ; GFX10:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX10:   dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
-  ; GFX10:   S_CBRANCH_SCC0 %bb.3, implicit $scc
-  ; GFX10:   S_BRANCH %bb.2
-  ; GFX10: bb.1:
-  ; GFX10:   successors: %bb.2(0x80000000)
-  ; GFX10:   $vgpr0 = V_MOV_B32_e32 1, implicit $exec
-  ; GFX10: bb.2:
-  ; GFX10:   liveins: $vgpr0
-  ; GFX10:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-  ; GFX10:   S_ENDPGM 0
-  ; GFX10: bb.3:
-  ; GFX10:   $exec_lo = S_MOV_B32 0
-  ; GFX10:   EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
-  ; GFX10:   S_ENDPGM 0
+  ; GCN-LABEL: name: early_term_scc0_next_terminator
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.2(0x80000000), %bb.3(0x00000000)
+  ; GCN:   liveins: $sgpr0, $sgpr1
+  ; GCN:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.3, implicit $scc
+  ; GCN:   S_BRANCH %bb.2
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x80000000)
+  ; GCN:   $vgpr0 = V_MOV_B32_e32 1, implicit $exec
+  ; GCN: bb.2:
+  ; GCN:   liveins: $vgpr0
+  ; GCN:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+  ; GCN:   S_ENDPGM 0
+  ; GCN: bb.3:
+  ; GCN:   $exec = S_MOV_B64 0
+  ; GCN:   EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+  ; GCN:   S_ENDPGM 0
   bb.0:
     liveins: $sgpr0, $sgpr1
     successors: %bb.2
@@ -112,26 +118,26 @@ liveins:
   - { reg: '$sgpr0' }
   - { reg: '$sgpr1' }
 body: |
-  ; GFX10-LABEL: name: early_term_scc0_in_block
-  ; GFX10: bb.0:
-  ; GFX10:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
-  ; GFX10:   liveins: $sgpr0, $sgpr1
-  ; GFX10:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX10:   dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
-  ; GFX10:   S_CBRANCH_SCC0 %bb.2, implicit $scc
-  ; GFX10: bb.3:
-  ; GFX10:   successors: %bb.1(0x80000000)
-  ; GFX10:   liveins: $vgpr0, $scc
-  ; GFX10:   $vgpr1 = V_MOV_B32_e32 1, implicit $exec
-  ; GFX10: bb.1:
-  ; GFX10:   liveins: $vgpr0, $vgpr1
-  ; GFX10:   EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
-  ; GFX10:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
-  ; GFX10:   S_ENDPGM 0
-  ; GFX10: bb.2:
-  ; GFX10:   $exec_lo = S_MOV_B32 0
-  ; GFX10:   EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
-  ; GFX10:   S_ENDPGM 0
+  ; GCN-LABEL: name: early_term_scc0_in_block
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.3(0x40000000), %bb.2(0x40000000)
+  ; GCN:   liveins: $sgpr0, $sgpr1
+  ; GCN:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN: bb.3:
+  ; GCN:   successors: %bb.1(0x80000000)
+  ; GCN:   liveins: $vgpr0, $scc
+  ; GCN:   $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+  ; GCN: bb.1:
+  ; GCN:   liveins: $vgpr0, $vgpr1
+  ; GCN:   EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
+  ; GCN:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+  ; GCN:   S_ENDPGM 0
+  ; GCN: bb.2:
+  ; GCN:   $exec = S_MOV_B64 0
+  ; GCN:   EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+  ; GCN:   S_ENDPGM 0
   bb.0:
     liveins: $sgpr0, $sgpr1
     successors: %bb.1
@@ -155,15 +161,18 @@ liveins:
   - { reg: '$sgpr0' }
   - { reg: '$sgpr1' }
 body: |
-  ; GFX10-LABEL: name: early_term_scc0_gs
-  ; GFX10: bb.0:
-  ; GFX10:   successors: %bb.1(0x80000000)
-  ; GFX10:   liveins: $sgpr0, $sgpr1
-  ; GFX10:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX10:   dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
-  ; GFX10: bb.1:
-  ; GFX10:   liveins: $vgpr0
-  ; GFX10:   S_ENDPGM 0
+  ; GCN-LABEL: name: early_term_scc0_gs
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x80000000)
+  ; GCN:   liveins: $sgpr0, $sgpr1
+  ; GCN:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+  ; GCN: bb.1:
+  ; GCN:   liveins: $vgpr0
+  ; GCN:   S_ENDPGM 0
+  ; GCN: bb.2:
+  ; GCN:   $exec = S_MOV_B64 0
+  ; GCN:   S_ENDPGM 0
   bb.0:
     liveins: $sgpr0, $sgpr1
     successors: %bb.1
@@ -184,19 +193,55 @@ liveins:
   - { reg: '$sgpr0' }
   - { reg: '$sgpr1' }
 body: |
-  ; GFX10-LABEL: name: early_term_scc0_cs
-  ; GFX10: bb.0:
-  ; GFX10:   successors: %bb.1(0x80000000), %bb.2(0x00000000)
-  ; GFX10:   liveins: $sgpr0, $sgpr1
-  ; GFX10:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
-  ; GFX10:   dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
-  ; GFX10:   S_CBRANCH_SCC0 %bb.2, implicit $scc
-  ; GFX10: bb.1:
-  ; GFX10:   liveins: $vgpr0
-  ; GFX10:   S_ENDPGM 0
-  ; GFX10: bb.2:
-  ; GFX10:   $exec_lo = S_MOV_B32 0
-  ; GFX10:   S_ENDPGM 0
+  ; GCN-LABEL: name: early_term_scc0_cs
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x80000000), %bb.2(0x00000000)
+  ; GCN:   liveins: $sgpr0, $sgpr1
+  ; GCN:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN: bb.1:
+  ; GCN:   liveins: $vgpr0
+  ; GCN:   S_ENDPGM 0
+  ; GCN: bb.2:
+  ; GCN:   $exec = S_MOV_B64 0
+  ; GCN:   S_ENDPGM 0
+  bb.0:
+    liveins: $sgpr0, $sgpr1
+    successors: %bb.1
+
+    $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+    dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+    SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
+
+  bb.1:
+    liveins: $vgpr0
+    S_ENDPGM 0
+...
+
+---
+name: early_term_no_export
+tracksRegLiveness: true
+liveins:
+  - { reg: '$sgpr0' }
+  - { reg: '$sgpr1' }
+body: |
+  ; GCN-LABEL: name: early_term_no_export
+  ; GCN: bb.0:
+  ; GCN:   successors: %bb.1(0x80000000), %bb.2(0x00000000)
+  ; GCN:   liveins: $sgpr0, $sgpr1
+  ; GCN:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+  ; GCN:   dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+  ; GCN:   S_CBRANCH_SCC0 %bb.2, implicit $scc
+  ; GCN: bb.1:
+  ; GCN:   liveins: $vgpr0
+  ; GCN:   EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+  ; GCN:   S_ENDPGM 0
+  ; GCN: bb.2:
+  ; GCN:   $exec = S_MOV_B64 0
+  ; GFX9:  EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+  ; GFX10-NOT: EXP_DONE
+  ; GCN:   S_ENDPGM 0
   bb.0:
     liveins: $sgpr0, $sgpr1
     successors: %bb.1
@@ -207,5 +252,6 @@ body: |
 
   bb.1:
     liveins: $vgpr0
+    EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
     S_ENDPGM 0
 ...


        


More information about the llvm-commits mailing list