[llvm] ad74185 - [AMDGPU] Model distance to instruction in bundle
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 14 01:19:10 PST 2020
Author: Stanislav Mekhanoshin
Date: 2020-01-14T01:18:59-08:00
New Revision: ad741853c38880dff99cd5b5035b8965c5a73011
URL: https://github.com/llvm/llvm-project/commit/ad741853c38880dff99cd5b5035b8965c5a73011
DIFF: https://github.com/llvm/llvm-project/commit/ad741853c38880dff99cd5b5035b8965c5a73011.diff
LOG: [AMDGPU] Model distance to instruction in bundle
This change allows to model the height of the instruction
within a bundle for latency adjustment purposes.
Differential Revision: https://reviews.llvm.org/D72669
Added:
llvm/test/CodeGen/AMDGPU/bundle-latency.mir
Modified:
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index af1dc8d7a480..445e91092499 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -730,14 +730,26 @@ void GCNSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
auto Reg = Dep.getReg();
MachineBasicBlock::const_instr_iterator I(SrcI->getIterator());
MachineBasicBlock::const_instr_iterator E(SrcI->getParent()->instr_end());
+ unsigned Lat = 0;
for (++I; I != E && I->isBundledWithPred(); ++I) {
- if (!I->modifiesRegister(Reg, TRI))
- continue;
- Dep.setLatency(InstrInfo.getInstrLatency(getInstrItineraryData(), *I));
- break;
+ if (I->modifiesRegister(Reg, TRI))
+ Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *I);
+ else if (Lat)
+ --Lat;
}
+ Dep.setLatency(Lat);
} else if (DstI->isBundle()) {
- Dep.setLatency(InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI));
+ const SIRegisterInfo *TRI = getRegisterInfo();
+ auto Reg = Dep.getReg();
+ MachineBasicBlock::const_instr_iterator I(DstI->getIterator());
+ MachineBasicBlock::const_instr_iterator E(DstI->getParent()->instr_end());
+ unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI);
+ for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {
+ if (I->readsRegister(Reg, TRI))
+ break;
+ --Lat;
+ }
+ Dep.setLatency(Lat);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/bundle-latency.mir b/llvm/test/CodeGen/AMDGPU/bundle-latency.mir
new file mode 100644
index 000000000000..603d0cf33f90
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/bundle-latency.mir
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=post-RA-sched %s -o - | FileCheck -check-prefix=GCN %s
+
+# Check that we move consumer further from producer, even if one of them is in a bundle.
+
+---
+name: src_bundle_latency
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: src_bundle_latency
+ ; GCN: $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
+ ; GCN: }
+ ; GCN: $vgpr6 = V_ADD_F32_e32 killed $vgpr0, $vgpr0, implicit $exec
+ ; GCN: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $exec
+ $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
+ $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
+ }
+ $vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr6 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: dst_bundle_latency
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: dst_bundle_latency
+ ; GCN: $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $exec
+ ; GCN: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $exec
+ ; GCN: BUNDLE killed $vgpr0, killed $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
+ ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, 0, 0, implicit $exec
+ ; GCN: }
+ $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $exec
+ $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $exec
+ BUNDLE $vgpr0, $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
+ GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
+ }
+...
More information about the llvm-commits
mailing list