[llvm] a1fb307 - [AMDGPU] Allow hoisting of some VALU compare instructions
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 7 18:28:02 PST 2022
Author: Carl Ritson
Date: 2022-02-08T11:27:23+09:00
New Revision: a1fb307b4b8d5ffd8f66a477783e362e54a3c03c
URL: https://github.com/llvm/llvm-project/commit/a1fb307b4b8d5ffd8f66a477783e362e54a3c03c
DIFF: https://github.com/llvm/llvm-project/commit/a1fb307b4b8d5ffd8f66a477783e362e54a3c03c.diff
LOG: [AMDGPU] Allow hoisting of some VALU compare instructions
Conversatively allow hoisting/sinking of VALU comparisons.
If the result of a comparison is masked with exec, narrowing the
set of active lanes, then it is safe to hoist it as the masking
instruction will never by hoisted.
Heuristically this is also true for sinking, as we do not expect
the result of a sunk comparison that is masked with exec to be
used outside of the loop.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D118975
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/test/CodeGen/AMDGPU/licm-valu.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 972bef2fe4305..b4f6f90f8d436 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -130,9 +130,31 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
return false;
}
-static bool readsExecAsData(const MachineInstr &MI) {
- if (MI.isCompare())
- return true;
+// Returns true if the scalar result of a VALU instruction depends on exec.
+static bool resultDependsOnExec(const MachineInstr &MI) {
+ // Ignore comparisons which are only used masked with exec.
+ // This allows some hoisting/sinking of VALU comparisons.
+ if (MI.isCompare()) {
+ const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!DstReg.isVirtual())
+ return true;
+ for (MachineInstr &Use : MRI.use_nodbg_instructions(DstReg)) {
+ switch (Use.getOpcode()) {
+ case AMDGPU::S_AND_SAVEEXEC_B32:
+ case AMDGPU::S_AND_SAVEEXEC_B64:
+ break;
+ case AMDGPU::S_AND_B32:
+ case AMDGPU::S_AND_B64:
+ if (!Use.readsRegister(AMDGPU::EXEC))
+ return true;
+ break;
+ default:
+ return true;
+ }
+ }
+ return false;
+ }
switch (MI.getOpcode()) {
default:
@@ -147,7 +169,7 @@ static bool readsExecAsData(const MachineInstr &MI) {
bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const {
// Any implicit use of exec by VALU is not a real register read.
return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() &&
- isVALU(*MO.getParent()) && !readsExecAsData(*MO.getParent());
+ isVALU(*MO.getParent()) && !resultDependsOnExec(*MO.getParent());
}
bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
diff --git a/llvm/test/CodeGen/AMDGPU/licm-valu.mir b/llvm/test/CodeGen/AMDGPU/licm-valu.mir
index 0bf2c7c2bc3ba..00a5a4f1b32ea 100644
--- a/llvm/test/CodeGen/AMDGPU/licm-valu.mir
+++ b/llvm/test/CodeGen/AMDGPU/licm-valu.mir
@@ -47,7 +47,7 @@ body: |
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
- ; GCN-NEXT: $exec = S_OR_B64 $exec, 1, implicit-def $scc
+ ; GCN-NEXT: $exec = S_OR_B64 $exec, [[V_CMP_EQ_U32_e64_]], implicit-def $scc
; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
; GCN-NEXT: S_BRANCH %bb.2
; GCN-NEXT: {{ $}}
@@ -58,7 +58,39 @@ body: |
bb.1:
%0:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
- $exec = S_OR_B64 $exec, 1, implicit-def $scc
+ $exec = S_OR_B64 $exec, %0:sreg_64, implicit-def $scc
+ S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ S_BRANCH %bb.2
+
+ bb.2:
+ S_ENDPGM 0
+...
+---
+name: allowable_hoist_cmp
+tracksRegLiveness: true
+body: |
+ ; GCN-LABEL: name: allowable_hoist_cmp
+ ; GCN: bb.0:
+ ; GCN-NEXT: successors: %bb.1(0x80000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
+ ; GCN-NEXT: S_BRANCH %bb.1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.1:
+ ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: $exec = S_AND_B64 $exec, [[V_CMP_EQ_U32_e64_]], implicit-def $scc
+ ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+ ; GCN-NEXT: S_BRANCH %bb.2
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: bb.2:
+ ; GCN-NEXT: S_ENDPGM 0
+ bb.0:
+ S_BRANCH %bb.1
+
+ bb.1:
+ %0:sreg_64 = V_CMP_EQ_U32_e64 1, 2, implicit $exec
+ $exec = S_AND_B64 $exec, %0:sreg_64, implicit-def $scc
S_CBRANCH_EXECNZ %bb.1, implicit $exec
S_BRANCH %bb.2
More information about the llvm-commits
mailing list