[llvm] 9dcd75f - [AMDGPU] Allow frontends to disable null export for pixel shaders
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 21 18:21:18 PDT 2021
Author: Carl Ritson
Date: 2021-07-22T10:20:46+09:00
New Revision: 9dcd75f86f2457b31a8eca4fa47f28d2b912dec4
URL: https://github.com/llvm/llvm-project/commit/9dcd75f86f2457b31a8eca4fa47f28d2b912dec4
DIFF: https://github.com/llvm/llvm-project/commit/9dcd75f86f2457b31a8eca4fa47f28d2b912dec4.diff
LOG: [AMDGPU] Allow frontends to disable null export for pixel shaders
Disable null export (for kills) when a frontend defines a pixel
shader as not exporting using amdgpu-color-export and
amdgpu-depth-export function attrbutes.
This allows the generation of export free pixel shaders.
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D105683
Added:
Modified:
llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/test/CodeGen/AMDGPU/early-term.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
index 3fb96f15313d4..d560b477b8ba7 100644
--- a/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SILateBranchLowering.cpp
@@ -67,9 +67,19 @@ char &llvm::SILateBranchLoweringPassID = SILateBranchLowering::ID;
static void generateEndPgm(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
- const SIInstrInfo *TII, bool IsPS) {
- // "null export"
- if (IsPS) {
+ const SIInstrInfo *TII, MachineFunction &MF) {
+ const Function &F = MF.getFunction();
+ bool IsPS = F.getCallingConv() == CallingConv::AMDGPU_PS;
+
+ // Check if hardware has been configured to expect color or depth exports.
+ bool HasExports =
+ AMDGPU::getHasColorExport(F) || AMDGPU::getHasDepthExport(F);
+
+ // Prior to GFX10, hardware always expects at least one export for PS.
+ bool MustExport = !AMDGPU::isGFX10Plus(TII->getSubtarget());
+
+ if (IsPS && (HasExports || MustExport)) {
+ // Generate "null export" if hardware is expecting PS to export.
BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
.addImm(AMDGPU::Exp::ET_NULL)
.addReg(AMDGPU::VGPR0, RegState::Undef)
@@ -80,6 +90,7 @@ static void generateEndPgm(MachineBasicBlock &MBB,
.addImm(0) // compr
.addImm(0); // en
}
+
// s_endpgm
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
}
@@ -168,8 +179,7 @@ bool SILateBranchLowering::runOnMachineFunction(MachineFunction &MF) {
BuildMI(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII->get(MovOpc),
ExecReg)
.addImm(0);
- generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII,
- MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS);
+ generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII, MF);
for (MachineInstr *Instr : EarlyTermInstrs) {
// Early termination in GS does nothing
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 8cd748eaec6b4..29bbf50cbfdc3 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1344,6 +1344,17 @@ unsigned getInitialPSInputAddr(const Function &F) {
return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}
+bool getHasColorExport(const Function &F) {
+ // As a safe default always respond as if PS has color exports.
+ return getIntegerAttribute(
+ F, "amdgpu-color-export",
+ F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
+}
+
+bool getHasDepthExport(const Function &F) {
+ return getIntegerAttribute(F, "amdgpu-depth-export", 0) != 0;
+}
+
bool isShader(CallingConv::ID cc) {
switch(cc) {
case CallingConv::AMDGPU_VS:
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 40344c0281a2f..72c872dec5ba7 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -689,6 +689,10 @@ uint64_t encodeMsg(uint64_t MsgId,
unsigned getInitialPSInputAddr(const Function &F);
+bool getHasColorExport(const Function &F);
+
+bool getHasDepthExport(const Function &F);
+
LLVM_READNONE
bool isShader(CallingConv::ID CC);
diff --git a/llvm/test/CodeGen/AMDGPU/early-term.mir b/llvm/test/CodeGen/AMDGPU/early-term.mir
index ffe56c4085fdd..a8a245b45af6d 100644
--- a/llvm/test/CodeGen/AMDGPU/early-term.mir
+++ b/llvm/test/CodeGen/AMDGPU/early-term.mir
@@ -1,5 +1,5 @@
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefix=GFX10 %s
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX9 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -run-pass=si-late-branch-lowering -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s
--- |
define amdgpu_ps void @early_term_scc0_end_block() {
@@ -21,6 +21,12 @@
define amdgpu_cs void @early_term_scc0_cs() {
ret void
}
+
+ define amdgpu_ps void @early_term_no_export() #0 {
+ ret void
+ }
+
+ attributes #0 = { "amdgpu-color-export"="0" "amdgpu-depth-export"="0" }
...
---
@@ -30,21 +36,21 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
- ; GFX10-LABEL: name: early_term_scc0_end_block
- ; GFX10: bb.0:
- ; GFX10: successors: %bb.1(0x80000000), %bb.2(0x00000000)
- ; GFX10: liveins: $sgpr0, $sgpr1
- ; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
- ; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc
- ; GFX10: bb.1:
- ; GFX10: liveins: $vgpr0
- ; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
- ; GFX10: S_ENDPGM 0
- ; GFX10: bb.2:
- ; GFX10: $exec_lo = S_MOV_B32 0
- ; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
- ; GFX10: S_ENDPGM 0
+ ; GCN-LABEL: name: early_term_scc0_end_block
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000)
+ ; GCN: liveins: $sgpr0, $sgpr1
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: bb.1:
+ ; GCN: liveins: $vgpr0
+ ; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GCN: S_ENDPGM 0
+ ; GCN: bb.2:
+ ; GCN: $exec = S_MOV_B64 0
+ ; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
@@ -66,25 +72,25 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
- ; GFX10-LABEL: name: early_term_scc0_next_terminator
- ; GFX10: bb.0:
- ; GFX10: successors: %bb.2(0x80000000), %bb.3(0x00000000)
- ; GFX10: liveins: $sgpr0, $sgpr1
- ; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
- ; GFX10: S_CBRANCH_SCC0 %bb.3, implicit $scc
- ; GFX10: S_BRANCH %bb.2
- ; GFX10: bb.1:
- ; GFX10: successors: %bb.2(0x80000000)
- ; GFX10: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
- ; GFX10: bb.2:
- ; GFX10: liveins: $vgpr0
- ; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
- ; GFX10: S_ENDPGM 0
- ; GFX10: bb.3:
- ; GFX10: $exec_lo = S_MOV_B32 0
- ; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
- ; GFX10: S_ENDPGM 0
+ ; GCN-LABEL: name: early_term_scc0_next_terminator
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.2(0x80000000), %bb.3(0x00000000)
+ ; GCN: liveins: $sgpr0, $sgpr1
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.3, implicit $scc
+ ; GCN: S_BRANCH %bb.2
+ ; GCN: bb.1:
+ ; GCN: successors: %bb.2(0x80000000)
+ ; GCN: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
+ ; GCN: bb.2:
+ ; GCN: liveins: $vgpr0
+ ; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GCN: S_ENDPGM 0
+ ; GCN: bb.3:
+ ; GCN: $exec = S_MOV_B64 0
+ ; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.2
@@ -112,26 +118,26 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
- ; GFX10-LABEL: name: early_term_scc0_in_block
- ; GFX10: bb.0:
- ; GFX10: successors: %bb.3(0x40000000), %bb.2(0x40000000)
- ; GFX10: liveins: $sgpr0, $sgpr1
- ; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
- ; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc
- ; GFX10: bb.3:
- ; GFX10: successors: %bb.1(0x80000000)
- ; GFX10: liveins: $vgpr0, $scc
- ; GFX10: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
- ; GFX10: bb.1:
- ; GFX10: liveins: $vgpr0, $vgpr1
- ; GFX10: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
- ; GFX10: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
- ; GFX10: S_ENDPGM 0
- ; GFX10: bb.2:
- ; GFX10: $exec_lo = S_MOV_B32 0
- ; GFX10: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
- ; GFX10: S_ENDPGM 0
+ ; GCN-LABEL: name: early_term_scc0_in_block
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; GCN: liveins: $sgpr0, $sgpr1
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: bb.3:
+ ; GCN: successors: %bb.1(0x80000000)
+ ; GCN: liveins: $vgpr0, $scc
+ ; GCN: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+ ; GCN: bb.1:
+ ; GCN: liveins: $vgpr0, $vgpr1
+ ; GCN: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
+ ; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GCN: S_ENDPGM 0
+ ; GCN: bb.2:
+ ; GCN: $exec = S_MOV_B64 0
+ ; GCN: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+ ; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
@@ -155,15 +161,18 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
- ; GFX10-LABEL: name: early_term_scc0_gs
- ; GFX10: bb.0:
- ; GFX10: successors: %bb.1(0x80000000)
- ; GFX10: liveins: $sgpr0, $sgpr1
- ; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
- ; GFX10: bb.1:
- ; GFX10: liveins: $vgpr0
- ; GFX10: S_ENDPGM 0
+ ; GCN-LABEL: name: early_term_scc0_gs
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x80000000)
+ ; GCN: liveins: $sgpr0, $sgpr1
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ ; GCN: bb.1:
+ ; GCN: liveins: $vgpr0
+ ; GCN: S_ENDPGM 0
+ ; GCN: bb.2:
+ ; GCN: $exec = S_MOV_B64 0
+ ; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
@@ -184,19 +193,55 @@ liveins:
- { reg: '$sgpr0' }
- { reg: '$sgpr1' }
body: |
- ; GFX10-LABEL: name: early_term_scc0_cs
- ; GFX10: bb.0:
- ; GFX10: successors: %bb.1(0x80000000), %bb.2(0x00000000)
- ; GFX10: liveins: $sgpr0, $sgpr1
- ; GFX10: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GFX10: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
- ; GFX10: S_CBRANCH_SCC0 %bb.2, implicit $scc
- ; GFX10: bb.1:
- ; GFX10: liveins: $vgpr0
- ; GFX10: S_ENDPGM 0
- ; GFX10: bb.2:
- ; GFX10: $exec_lo = S_MOV_B32 0
- ; GFX10: S_ENDPGM 0
+ ; GCN-LABEL: name: early_term_scc0_cs
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000)
+ ; GCN: liveins: $sgpr0, $sgpr1
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: bb.1:
+ ; GCN: liveins: $vgpr0
+ ; GCN: S_ENDPGM 0
+ ; GCN: bb.2:
+ ; GCN: $exec = S_MOV_B64 0
+ ; GCN: S_ENDPGM 0
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ successors: %bb.1
+
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
+
+ bb.1:
+ liveins: $vgpr0
+ S_ENDPGM 0
+...
+
+---
+name: early_term_no_export
+tracksRegLiveness: true
+liveins:
+ - { reg: '$sgpr0' }
+ - { reg: '$sgpr1' }
+body: |
+ ; GCN-LABEL: name: early_term_no_export
+ ; GCN: bb.0:
+ ; GCN: successors: %bb.1(0x80000000), %bb.2(0x00000000)
+ ; GCN: liveins: $sgpr0, $sgpr1
+ ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ ; GCN: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; GCN: bb.1:
+ ; GCN: liveins: $vgpr0
+ ; GCN: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; GCN: S_ENDPGM 0
+ ; GCN: bb.2:
+ ; GCN: $exec = S_MOV_B64 0
+ ; GFX9: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+ ; GFX10-NOT: EXP_DONE
+ ; GCN: S_ENDPGM 0
bb.0:
liveins: $sgpr0, $sgpr1
successors: %bb.1
@@ -207,5 +252,6 @@ body: |
bb.1:
liveins: $vgpr0
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
S_ENDPGM 0
...
More information about the llvm-commits
mailing list