[llvm] 76b7d34 - [AMDGPU] Add TII::isIgnorableUse() to allow VOP rematerialization
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 14 13:04:08 PDT 2021
Author: Stanislav Mekhanoshin
Date: 2021-07-14T13:03:58-07:00
New Revision: 76b7d3432e38bb7690c3bbd4940786b5cb751b95
URL: https://github.com/llvm/llvm-project/commit/76b7d3432e38bb7690c3bbd4940786b5cb751b95
DIFF: https://github.com/llvm/llvm-project/commit/76b7d3432e38bb7690c3bbd4940786b5cb751b95.diff
LOG: [AMDGPU] Add TII::isIgnorableUse() to allow VOP rematerialization
Any def of EXEC prevents rematerialization of any VOP instruction
because of the physreg use. Create a callback to check if the
physreg use can be ingored to allow rematerialization.
Differential Revision: https://reviews.llvm.org/D105836
Added:
Modified:
llvm/include/llvm/CodeGen/TargetInstrInfo.h
llvm/lib/CodeGen/LiveRangeEdit.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
llvm/test/CodeGen/AMDGPU/remat-sop.mir
llvm/test/CodeGen/AMDGPU/remat-vop.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 21758e0cfc2a..5c45cea5ccfe 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -129,6 +129,12 @@ class TargetInstrInfo : public MCInstrInfo {
isReallyTriviallyReMaterializableGeneric(MI, AA)));
}
+ /// Given \p MO is a PhysReg use return if it can be ignored for the purpose
+ /// of instruction rematerialization.
+ virtual bool isIgnorableUse(const MachineOperand &MO) const {
+ return false;
+ }
+
protected:
/// For instructions with opcodes for which the M_REMATERIALIZABLE flag is
/// set, this hook lets the target specify whether the instruction is actually
diff --git a/llvm/lib/CodeGen/LiveRangeEdit.cpp b/llvm/lib/CodeGen/LiveRangeEdit.cpp
index c5d1734b0f1f..64a2dd275643 100644
--- a/llvm/lib/CodeGen/LiveRangeEdit.cpp
+++ b/llvm/lib/CodeGen/LiveRangeEdit.cpp
@@ -113,9 +113,10 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
if (!MO.isReg() || !MO.getReg() || !MO.readsReg())
continue;
- // We can't remat physreg uses, unless it is a constant.
+ // We can't remat physreg uses, unless it is a constant or target wants
+ // to ignore this use.
if (Register::isPhysicalRegister(MO.getReg())) {
- if (MRI.isConstantPhysReg(MO.getReg()))
+ if (MRI.isConstantPhysReg(MO.getReg()) || TII.isIgnorableUse(MO))
continue;
return false;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d0f5b2d66189..af276c606dcb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -122,6 +122,12 @@ bool SIInstrInfo::isReallyTriviallyReMaterializable(const MachineInstr &MI,
return false;
}
+bool SIInstrInfo::isIgnorableUse(const MachineOperand &MO) const {
+ // Any implicit use of exec by VALU is not a real register read.
+ return MO.getReg() == AMDGPU::EXEC && MO.isImplicit() &&
+ isVALU(*MO.getParent());
+}
+
bool SIInstrInfo::areLoadsFromSameBasePtr(SDNode *Load0, SDNode *Load1,
int64_t &Offset0,
int64_t &Offset1) const {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 768cfd888bf0..e55774b94b0b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -181,6 +181,8 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
bool isReallyTriviallyReMaterializable(const MachineInstr &MI,
AAResults *AA) const override;
+ bool isIgnorableUse(const MachineOperand &MO) const override;
+
bool areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,
int64_t &Offset1,
int64_t &Offset2) const override;
diff --git a/llvm/test/CodeGen/AMDGPU/remat-sop.mir b/llvm/test/CodeGen/AMDGPU/remat-sop.mir
index 051f5ad094c2..ed799bfca028 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-sop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-sop.mir
@@ -23,6 +23,35 @@ body: |
S_ENDPGM 0
...
---
+name: test_no_remat_s_mov_b32_impuse_exec
+tracksRegLiveness: true
+machineFunctionInfo:
+ stackPtrOffsetReg: $sgpr32
+body: |
+ bb.0:
+ ; GCN-LABEL: name: test_no_remat_s_mov_b32_impuse_exec
+ ; GCN: $exec = IMPLICIT_DEF
+ ; GCN: renamable $sgpr0 = S_MOV_B32 1, implicit $exec
+ ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.1, addrspace 5)
+ ; GCN: renamable $sgpr1 = S_MOV_B32 2, implicit $exec
+ ; GCN: renamable $sgpr0 = S_MOV_B32 3, implicit $exec
+ ; GCN: SI_SPILL_S32_SAVE killed renamable $sgpr0, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s32) into %stack.0, addrspace 5)
+ ; GCN: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.1, addrspace 5)
+ ; GCN: S_NOP 0, implicit killed renamable $sgpr0
+ ; GCN: S_NOP 0, implicit killed renamable $sgpr1
+ ; GCN: renamable $sgpr0 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s32) from %stack.0, addrspace 5)
+ ; GCN: S_NOP 0, implicit killed renamable $sgpr0
+ ; GCN: S_ENDPGM 0
+ $exec = IMPLICIT_DEF
+ %0:sreg_32 = S_MOV_B32 1, implicit $exec
+ %1:sreg_32 = S_MOV_B32 2, implicit $exec
+ %2:sreg_32 = S_MOV_B32 3, implicit $exec
+ S_NOP 0, implicit %0
+ S_NOP 0, implicit %1
+ S_NOP 0, implicit %2
+ S_ENDPGM 0
+...
+---
name: test_remat_s_mov_b64
tracksRegLiveness: true
body: |
diff --git a/llvm/test/CodeGen/AMDGPU/remat-vop.mir b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
index 1de8efebbe4d..2be27057ebe7 100644
--- a/llvm/test/CodeGen/AMDGPU/remat-vop.mir
+++ b/llvm/test/CodeGen/AMDGPU/remat-vop.mir
@@ -52,6 +52,31 @@ body: |
S_ENDPGM 0
...
---
+name: test_remat_v_mov_b32_e32_exec_def
+tracksRegLiveness: true
+machineFunctionInfo:
+ stackPtrOffsetReg: $sgpr32
+body: |
+ bb.0:
+ ; GCN-LABEL: name: test_remat_v_mov_b32_e32_exec_def
+ ; GCN: renamable $vgpr0 = V_MOV_B32_e32 1, implicit $exec
+ ; GCN: renamable $vgpr1 = V_MOV_B32_e32 2, implicit $exec
+ ; GCN: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: S_NOP 0, implicit killed renamable $vgpr1
+ ; GCN: renamable $vgpr0 = V_MOV_B32_e32 3, implicit $exec
+ ; GCN: S_NOP 0, implicit killed renamable $vgpr0
+ ; GCN: $exec = S_ANDN2_B64_term $exec, undef renamable $sgpr0_sgpr1, implicit-def $scc
+ ; GCN: S_ENDPGM 0
+ %0:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
+ %1:vgpr_32 = V_MOV_B32_e32 2, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_e32 3, implicit $exec
+ S_NOP 0, implicit %0
+ S_NOP 0, implicit %1
+ S_NOP 0, implicit %2
+ $exec = S_ANDN2_B64_term $exec, undef %4:sreg_64, implicit-def $scc
+ S_ENDPGM 0
+...
+---
name: test_remat_v_mov_b32_e64
tracksRegLiveness: true
body: |
More information about the llvm-commits
mailing list