[llvm-branch-commits] [llvm] 790c75c - [AMDGPU] Add SI_EARLY_TERMINATE_SCC0 for early terminating shader
Carl Ritson via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Jan 12 20:34:21 PST 2021
Author: Carl Ritson
Date: 2021-01-13T13:29:05+09:00
New Revision: 790c75c16373d37846c8433a69efd9b0d5e4ad12
URL: https://github.com/llvm/llvm-project/commit/790c75c16373d37846c8433a69efd9b0d5e4ad12
DIFF: https://github.com/llvm/llvm-project/commit/790c75c16373d37846c8433a69efd9b0d5e4ad12.diff
LOG: [AMDGPU] Add SI_EARLY_TERMINATE_SCC0 for early terminating shader
Add pseudo instruction to allow early termination of pixel shader
anywhere based on the value of SCC. The intention is to use this
when a mask of live lanes is updated, e.g. live lanes in WQM pass.
This facilitates early termination of shaders even when EXEC is
incomplete, e.g. in non-uniform control flow.
Reviewed By: foad
Differential Revision: https://reviews.llvm.org/D88777
Added:
llvm/test/CodeGen/AMDGPU/early-term.mir
Modified:
llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
llvm/lib/Target/AMDGPU/SIInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
index eb2e12f2dcda..e80325bddc43 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
@@ -49,6 +49,7 @@ class SIInsertSkips : public MachineFunctionPass {
DebugLoc DL);
bool kill(MachineInstr &MI);
+ void earlyTerm(MachineInstr &MI);
bool skipMaskBranch(MachineInstr &MI, MachineBasicBlock &MBB);
@@ -145,19 +146,22 @@ bool SIInsertSkips::dominatesAllReachable(MachineBasicBlock &MBB) {
return true;
}
-static void generatePsEndPgm(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I, DebugLoc DL,
- const SIInstrInfo *TII) {
- // Generate "null export; s_endpgm".
- BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
- .addImm(AMDGPU::Exp::ET_NULL)
- .addReg(AMDGPU::VGPR0, RegState::Undef)
- .addReg(AMDGPU::VGPR0, RegState::Undef)
- .addReg(AMDGPU::VGPR0, RegState::Undef)
- .addReg(AMDGPU::VGPR0, RegState::Undef)
- .addImm(1) // vm
- .addImm(0) // compr
- .addImm(0); // en
+static void generateEndPgm(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I, DebugLoc DL,
+ const SIInstrInfo *TII, bool IsPS) {
+ // "null export"
+ if (IsPS) {
+ BuildMI(MBB, I, DL, TII->get(AMDGPU::EXP_DONE))
+ .addImm(AMDGPU::Exp::ET_NULL)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addReg(AMDGPU::VGPR0, RegState::Undef)
+ .addImm(1) // vm
+ .addImm(0) // compr
+ .addImm(0); // en
+ }
+ // s_endpgm
BuildMI(MBB, I, DL, TII->get(AMDGPU::S_ENDPGM)).addImm(0);
}
@@ -169,7 +173,9 @@ void SIInsertSkips::ensureEarlyExitBlock(MachineBasicBlock &MBB,
if (!EarlyExitBlock) {
EarlyExitBlock = MF->CreateMachineBasicBlock();
MF->insert(MF->end(), EarlyExitBlock);
- generatePsEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII);
+ generateEndPgm(*EarlyExitBlock, EarlyExitBlock->end(), DL, TII,
+ MF->getFunction().getCallingConv() ==
+ CallingConv::AMDGPU_PS);
EarlyExitClearsExec = false;
}
@@ -178,7 +184,6 @@ void SIInsertSkips::ensureEarlyExitBlock(MachineBasicBlock &MBB,
unsigned Mov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
Register Exec = ST.isWave32() ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
auto ExitI = EarlyExitBlock->getFirstNonPHI();
- assert(ExitI->getOpcode() == AMDGPU::EXP_DONE);
BuildMI(*EarlyExitBlock, ExitI, DL, TII->get(Mov), Exec).addImm(0);
EarlyExitClearsExec = true;
}
@@ -224,7 +229,7 @@ void SIInsertSkips::skipIfDead(MachineBasicBlock &MBB,
I == MBB.end() && !llvm::is_contained(MBB.successors(), &*NextBBI);
if (NoSuccessor) {
- generatePsEndPgm(MBB, I, DL, TII);
+ generateEndPgm(MBB, I, DL, TII, true);
} else {
ensureEarlyExitBlock(MBB, false);
@@ -368,6 +373,23 @@ bool SIInsertSkips::kill(MachineInstr &MI) {
}
}
+void SIInsertSkips::earlyTerm(MachineInstr &MI) {
+ MachineBasicBlock &MBB = *MI.getParent();
+ const DebugLoc DL = MI.getDebugLoc();
+
+ ensureEarlyExitBlock(MBB, true);
+
+ auto BranchMI = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_CBRANCH_SCC0))
+ .addMBB(EarlyExitBlock);
+ auto Next = std::next(MI.getIterator());
+
+ if (Next != MBB.end() && !Next->isTerminator())
+ splitBlock(MBB, *BranchMI, MDT);
+
+ MBB.addSuccessor(EarlyExitBlock);
+ MDT->getBase().insertEdge(&MBB, EarlyExitBlock);
+}
+
// Returns true if a branch over the block was inserted.
bool SIInsertSkips::skipMaskBranch(MachineInstr &MI,
MachineBasicBlock &SrcMBB) {
@@ -393,6 +415,7 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
SkipThreshold = SkipThresholdFlag;
SmallVector<MachineInstr *, 4> KillInstrs;
+ SmallVector<MachineInstr *, 4> EarlyTermInstrs;
bool MadeChange = false;
for (MachineBasicBlock &MBB : MF) {
@@ -451,18 +474,29 @@ bool SIInsertSkips::runOnMachineFunction(MachineFunction &MF) {
}
break;
+ case AMDGPU::SI_EARLY_TERMINATE_SCC0:
+ EarlyTermInstrs.push_back(&MI);
+ break;
+
default:
break;
}
}
}
+ for (MachineInstr *Instr : EarlyTermInstrs) {
+ // Early termination in GS does nothing
+ if (MF.getFunction().getCallingConv() != CallingConv::AMDGPU_GS)
+ earlyTerm(*Instr);
+ Instr->eraseFromParent();
+ }
for (MachineInstr *Kill : KillInstrs) {
skipIfDead(*Kill->getParent(), std::next(Kill->getIterator()),
Kill->getDebugLoc());
Kill->eraseFromParent();
}
KillInstrs.clear();
+ EarlyTermInstrs.clear();
EarlyExitBlock = nullptr;
return MadeChange;
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 82b1039002fe..43bd80c62d80 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -321,6 +321,14 @@ def SI_IF_BREAK : CFPseudoInstSI <
let isReMaterializable = 1;
}
+// Branch to the early termination block of the shader if SCC is 0.
+// This uses SCC from a previous SALU operation, i.e. the update of
+// a mask of live lanes after a kill/demote operation.
+// Only valid in pixel shaders.
+def SI_EARLY_TERMINATE_SCC0 : SPseudoInstSI <(outs), (ins)> {
+ let Uses = [EXEC,SCC];
+}
+
let Uses = [EXEC] in {
multiclass PseudoInstKill <dag ins> {
diff --git a/llvm/test/CodeGen/AMDGPU/early-term.mir b/llvm/test/CodeGen/AMDGPU/early-term.mir
new file mode 100644
index 000000000000..361a4ebefcc6
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/early-term.mir
@@ -0,0 +1,268 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=si-insert-skips -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+ define amdgpu_ps void @early_term_scc0_end_block() {
+ ret void
+ }
+
+ define amdgpu_ps void @early_term_scc0_next_terminator() {
+ ret void
+ }
+
+ define amdgpu_ps void @early_term_scc0_in_block() {
+ ret void
+ }
+
+ define amdgpu_ps void @early_term_scc0_with_kill() {
+ ret void
+ }
+
+ define amdgpu_gs void @early_term_scc0_gs() {
+ ret void
+ }
+
+ define amdgpu_cs void @early_term_scc0_cs() {
+ ret void
+ }
+...
+
+---
+name: early_term_scc0_end_block
+tracksRegLiveness: true
+liveins:
+ - { reg: '$sgpr0' }
+ - { reg: '$sgpr1' }
+body: |
+ ; CHECK-LABEL: name: early_term_scc0_end_block
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000), %bb.2(0x00000000)
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ ; CHECK: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; CHECK: bb.1:
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; CHECK: S_ENDPGM 0
+ ; CHECK: bb.2:
+ ; CHECK: $exec_lo = S_MOV_B32 0
+ ; CHECK: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ successors: %bb.1
+
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
+
+ bb.1:
+ liveins: $vgpr0
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: early_term_scc0_next_terminator
+tracksRegLiveness: true
+liveins:
+ - { reg: '$sgpr0' }
+ - { reg: '$sgpr1' }
+body: |
+ ; CHECK-LABEL: name: early_term_scc0_next_terminator
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.2(0x80000000), %bb.3(0x00000000)
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ ; CHECK: S_CBRANCH_SCC0 %bb.3, implicit $scc
+ ; CHECK: S_BRANCH %bb.2
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 1, implicit $exec
+ ; CHECK: bb.2:
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; CHECK: S_ENDPGM 0
+ ; CHECK: bb.3:
+ ; CHECK: $exec_lo = S_MOV_B32 0
+ ; CHECK: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ successors: %bb.2
+
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.1:
+ successors: %bb.2
+ $vgpr0 = V_MOV_B32_e32 1, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ liveins: $vgpr0
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: early_term_scc0_in_block
+tracksRegLiveness: true
+liveins:
+ - { reg: '$sgpr0' }
+ - { reg: '$sgpr1' }
+body: |
+ ; CHECK-LABEL: name: early_term_scc0_in_block
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ ; CHECK: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; CHECK: bb.3:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $vgpr0, $scc
+ ; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+ ; CHECK: bb.1:
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
+ ; CHECK: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; CHECK: S_ENDPGM 0
+ ; CHECK: bb.2:
+ ; CHECK: $exec_lo = S_MOV_B32 0
+ ; CHECK: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ successors: %bb.1
+
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+
+ bb.1:
+ liveins: $vgpr0, $vgpr1
+ EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: early_term_scc0_with_kill
+tracksRegLiveness: true
+liveins:
+ - { reg: '$sgpr0' }
+ - { reg: '$sgpr1' }
+ - { reg: '$vgpr2' }
+body: |
+ ; CHECK-LABEL: name: early_term_scc0_with_kill
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000), %bb.3(0x00000000)
+ ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr2
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: V_CMPX_LE_F32_nosdst_e32 0, killed $vgpr2, implicit-def $exec, implicit $mode, implicit $exec
+ ; CHECK: S_CBRANCH_EXECZ %bb.3, implicit $exec
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.4(0x40000000), %bb.3(0x40000000)
+ ; CHECK: liveins: $sgpr0, $sgpr1, $vgpr0
+ ; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ ; CHECK: S_CBRANCH_SCC0 %bb.3, implicit $scc
+ ; CHECK: bb.4:
+ ; CHECK: successors: %bb.2(0x80000000)
+ ; CHECK: liveins: $vgpr0, $scc
+ ; CHECK: $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+ ; CHECK: bb.2:
+ ; CHECK: liveins: $vgpr0, $vgpr1
+ ; CHECK: EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
+ ; CHECK: EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ ; CHECK: S_ENDPGM 0
+ ; CHECK: bb.3:
+ ; CHECK: $exec_lo = S_MOV_B32 0
+ ; CHECK: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ liveins: $sgpr0, $sgpr1, $vgpr2
+ successors: %bb.1
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ SI_KILL_F32_COND_IMM_TERMINATOR killed $vgpr2, 0, 3, implicit-def $exec, implicit-def $vcc, implicit-def $scc, implicit $exec
+
+ bb.1:
+ liveins: $sgpr0, $sgpr1, $vgpr0
+ successors: %bb.2
+ dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
+ $vgpr1 = V_MOV_B32_e32 1, implicit $exec
+
+ bb.2:
+ liveins: $vgpr0, $vgpr1
+ EXP 1, $vgpr1, $vgpr1, $vgpr1, $vgpr1, -1, -1, 15, implicit $exec
+ EXP_DONE 0, $vgpr0, $vgpr0, $vgpr0, $vgpr0, -1, -1, 15, implicit $exec
+ S_ENDPGM 0
+...
+
+---
+name: early_term_scc0_gs
+tracksRegLiveness: true
+liveins:
+ - { reg: '$sgpr0' }
+ - { reg: '$sgpr1' }
+body: |
+ ; CHECK-LABEL: name: early_term_scc0_gs
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000)
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ ; CHECK: bb.1:
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ successors: %bb.1
+
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
+
+ bb.1:
+ liveins: $vgpr0
+ S_ENDPGM 0
+...
+
+---
+name: early_term_scc0_cs
+tracksRegLiveness: true
+liveins:
+ - { reg: '$sgpr0' }
+ - { reg: '$sgpr1' }
+body: |
+ ; CHECK-LABEL: name: early_term_scc0_cs
+ ; CHECK: bb.0:
+ ; CHECK: successors: %bb.1(0x80000000), %bb.2(0x00000000)
+ ; CHECK: liveins: $sgpr0, $sgpr1
+ ; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; CHECK: dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ ; CHECK: S_CBRANCH_SCC0 %bb.2, implicit $scc
+ ; CHECK: bb.1:
+ ; CHECK: liveins: $vgpr0
+ ; CHECK: S_ENDPGM 0
+ ; CHECK: bb.2:
+ ; CHECK: $exec_lo = S_MOV_B32 0
+ ; CHECK: S_ENDPGM 0
+ bb.0:
+ liveins: $sgpr0, $sgpr1
+ successors: %bb.1
+
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ dead $sgpr0 = S_AND_B32 $sgpr0, killed $sgpr1, implicit-def $scc
+ SI_EARLY_TERMINATE_SCC0 implicit $scc, implicit $exec
+
+ bb.1:
+ liveins: $vgpr0
+ S_ENDPGM 0
+...
More information about the llvm-branch-commits
mailing list