[llvm] r371671 - AMDGPU: Move m0 initializations earlier
Austin Kerbow via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 14:28:42 PDT 2019
Author: kerbowa
Date: Wed Sep 11 14:28:41 2019
New Revision: 371671
URL: http://llvm.org/viewvc/llvm-project?rev=371671&view=rev
Log:
AMDGPU: Move m0 initializations earlier
Summary:
After hoisting and merging m0 initializations schedule them as early as
possible in the MBB. This helps the scheduler avoid hazards in some
cases.
Reviewers: rampitec, arsenm
Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, arphaman, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D67450
Modified:
llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll
llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir
Modified: llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp?rev=371671&r1=371670&r2=371671&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIFixSGPRCopies.cpp Wed Sep 11 14:28:41 2019
@@ -466,6 +466,7 @@ getFirstNonPrologue(MachineBasicBlock *M
// executioon.
static bool hoistAndMergeSGPRInits(unsigned Reg,
const MachineRegisterInfo &MRI,
+ const TargetRegisterInfo *TRI,
MachineDominatorTree &MDT,
const TargetInstrInfo *TII) {
// List of inits by immediate value.
@@ -480,7 +481,7 @@ static bool hoistAndMergeSGPRInits(unsig
for (auto &MI : MRI.def_instructions(Reg)) {
MachineOperand *Imm = nullptr;
- for (auto &MO: MI.operands()) {
+ for (auto &MO : MI.operands()) {
if ((MO.isReg() && ((MO.isDef() && MO.getReg() != Reg) || !MO.isDef())) ||
(!MO.isImm() && !MO.isReg()) || (MO.isImm() && Imm)) {
Imm = nullptr;
@@ -585,8 +586,41 @@ static bool hoistAndMergeSGPRInits(unsig
}
}
- for (auto MI : MergedInstrs)
- MI->removeFromParent();
+ // Remove initializations that were merged into another.
+ for (auto &Init : Inits) {
+ auto &Defs = Init.second;
+ for (auto I = Defs.begin(); I != Defs.end(); ++I)
+ if (MergedInstrs.count(*I)) {
+ (*I)->eraseFromParent();
+ I = Defs.erase(I);
+ }
+ }
+
+ // Try to schedule SGPR initializations as early as possible in the MBB.
+ for (auto &Init : Inits) {
+ auto &Defs = Init.second;
+ for (auto MI : Defs) {
+ auto MBB = MI->getParent();
+ MachineInstr &BoundaryMI = *getFirstNonPrologue(MBB, TII);
+ MachineBasicBlock::reverse_iterator B(BoundaryMI);
+ // Check if B should actually be a bondary. If not set the previous
+ // instruction as the boundary instead.
+ if (!TII->isBasicBlockPrologue(*B))
+ B++;
+
+ auto R = std::next(MI->getReverseIterator());
+ const unsigned Threshold = 50;
+ // Search until B or Threashold for a place to insert the initialization.
+ for (unsigned I = 0; R != B && I < Threshold; ++R, ++I)
+ if (R->readsRegister(Reg, TRI) || R->definesRegister(Reg, TRI) ||
+ TII->isSchedulingBoundary(*R, MBB, *MBB->getParent()))
+ break;
+
+ // Move to directly after R.
+ if (&*--R != MI)
+ MBB->splice(*R, MBB, MI);
+ }
+ }
if (Changed)
MRI.clearKillFlags(Reg);
@@ -755,7 +789,7 @@ bool SIFixSGPRCopies::runOnMachineFuncti
}
if (MF.getTarget().getOptLevel() > CodeGenOpt::None && EnableM0Merge)
- hoistAndMergeSGPRInits(AMDGPU::M0, MRI, *MDT, TII);
+ hoistAndMergeSGPRInits(AMDGPU::M0, MRI, TRI, *MDT, TII);
return true;
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll?rev=371671&r1=371670&r2=371671&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/frame-index-elimination.ll Wed Sep 11 14:28:41 2019
@@ -26,12 +26,12 @@ define void @func_mov_fi_i32() #0 {
; CI: s_sub_u32 [[SUB0:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
; CI-NEXT: s_sub_u32 [[SUB1:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
-; CI-NEXT: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB1]], 6
-; CI-NEXT: v_lshr_b32_e64 v0, [[SUB0]], 6
-; CI-NEXT: v_add_i32_e64 v1, s{{\[[0-9]+:[0-9]+\]}}, 4, [[SCALED]]
+; CI-DAG: v_lshr_b32_e64 v0, [[SUB0]], 6
+; CI-DAG: v_lshr_b32_e64 [[SCALED:v[0-9]+]], [[SUB1]], 6
; CI-NOT: v_mov
; CI: ds_write_b32 v0, v0
-; CI-NEXT: ds_write_b32 v0, v1
+; CI-NEXT: v_add_i32_e64 v0, s{{\[[0-9]+:[0-9]+\]}}, 4, [[SCALED]]
+; CI-NEXT: ds_write_b32 v0, v0
; GFX9: s_sub_u32 [[SUB0:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
; GFX9-NEXT: s_sub_u32 [[SUB1:s[0-9]+|vcc_lo|vcc_hi]], s32, s33
Modified: llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir?rev=371671&r1=371670&r2=371671&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/merge-m0.mir Wed Sep 11 14:28:41 2019
@@ -1,7 +1,10 @@
# RUN: llc -march=amdgcn -amdgpu-enable-merge-m0 -verify-machineinstrs -run-pass si-fix-sgpr-copies %s -o - | FileCheck -check-prefix=GCN %s
+# GCN-LABEL: name: merge-m0-many-init
# GCN: bb.0.entry:
# GCN: SI_INIT_M0 -1
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
# GCN-NEXT: DS_WRITE_B32
# GCN-NEXT: DS_WRITE_B32
# GCN-NEXT: SI_INIT_M0 65536
@@ -45,9 +48,8 @@
# GCN-NEXT: DS_WRITE_B32
# GCN-NEXT: SI_INIT_M0 -1
# GCN-NEXT: DS_WRITE_B32
-
---
-name: merge-m0-many-init
+name: merge-m0-many-init
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
@@ -124,22 +126,24 @@ body: |
...
+# GCN-LABEL: name: merge-m0-dont-hoist-past-init-with-different-initializer
# GCN: bb.0.entry:
# GCN: SI_INIT_M0 65536
+# GCN-NEXT: IMPLICIT_DEF
+# GCN-NEXT: IMPLICIT_DEF
# GCN-NEXT: DS_WRITE_B32
-#GCN: bb.1:
-#GCN-NOT: SI_INIT_M0 65536
-#GCN-NOT: SI_INIT_M0 -1
-
-#GCN: bb.2:
-#GCN: SI_INIT_M0 -1
+# GCN: bb.1:
+# GCN-NOT: SI_INIT_M0 65536
+# GCN-NOT: SI_INIT_M0 -1
-#GCN: bb.3:
-#GCN: SI_INIT_M0 -1
+# GCN: bb.2:
+# GCN: SI_INIT_M0 -1
+# GCN: bb.3:
+# GCN: SI_INIT_M0 -1
---
-name: merge-m0-dont-hoist-past-init-with-different-initializer
+name: merge-m0-dont-hoist-past-init-with-different-initializer
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
@@ -179,19 +183,19 @@ body: |
S_ENDPGM 0
...
+# GCN-LABEL: name: merge-m0-after-prologue
# GCN: bb.0.entry:
# GCN-NOT: SI_INIT_M0
# GCN: S_OR_B64
# GCN-NEXT: SI_INIT_M0
-#GCN: bb.1:
-#GCN-NOT: SI_INIT_M0 -1
-
-#GCN: bb.2:
-#GCN-NOT: SI_INIT_MO -1
+# GCN: bb.1:
+# GCN-NOT: SI_INIT_M0 -1
+# GCN: bb.2:
+# GCN-NOT: SI_INIT_MO -1
---
-name: merge-m0-after-prologue
+name: merge-m0-after-prologue
registers:
- { id: 0, class: vgpr_32 }
- { id: 1, class: vgpr_32 }
@@ -223,3 +227,71 @@ body: |
bb.3:
S_ENDPGM 0
...
+
+# GCN-LABEL: name: move-m0-avoid-hazard
+# GCN: $m0 = S_MOV_B32 -1
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+# GCN-NEXT: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
+---
+name: move-m0-avoid-hazard
+body: |
+ bb.0:
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $m0 = S_MOV_B32 -1
+ DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
+...
+
+# GCN-LABEL: name: move-m0-with-prologue
+# GCN $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
+# GCN: $m0 = S_MOV_B32 -1
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+# GCN-NEXT: DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
+---
+name: move-m0-with-prologue
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+
+ $exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
+ $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ $m0 = S_MOV_B32 -1
+ DS_GWS_INIT $vgpr0, 0, 1, implicit $m0, implicit $exec
+...
+
+# GCN-LABEL: name: move-m0-different-initializer
+# GCN: SI_INIT_M0 -1
+# GCN-NEXT: %0:vgpr_32 = IMPLICIT_DEF
+# GCN: SI_INIT_M0 65536
+# GCN-NEXT: S_NOP
+---
+name: move-m0-different-initializer
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ SI_INIT_M0 -1, implicit-def $m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+ S_NOP 0
+ SI_INIT_M0 65536, implicit-def $m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+...
+
+# GCN-LABEL: name: move-m0-schedule-boundary
+# GCN: S_SETREG
+# GCN-NEXT: SI_INIT_M0 -1
+---
+name: move-m0-schedule-boundary
+registers:
+ - { id: 0, class: vgpr_32 }
+ - { id: 1, class: vgpr_32 }
+body: |
+ bb.0:
+ %0 = IMPLICIT_DEF
+ %1 = IMPLICIT_DEF
+ S_SETREG_IMM32_B32 0, 1
+ SI_INIT_M0 -1, implicit-def $m0
+ DS_WRITE_B32 %0, %1, 0, 0, implicit $m0, implicit $exec
+...
More information about the llvm-commits
mailing list