[PATCH] D72669: [AMDGPU] Model distance to instruction in bundle
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 13 17:09:16 PST 2020
rampitec created this revision.
rampitec added a reviewer: foad.
Herald added subscribers: hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, arsenm.
Herald added a project: LLVM.
rampitec added a parent revision: D72655: [AMDGPU] Fix getInstrLatency() always returning 1.
This change allows to model the height of the instruction
within a bundle for latency adjustment purposes.
https://reviews.llvm.org/D72669
Files:
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/test/CodeGen/AMDGPU/bundle-latency.mir
Index: llvm/test/CodeGen/AMDGPU/bundle-latency.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/bundle-latency.mir
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=post-RA-sched %s -o - | FileCheck -check-prefix=GCN %s
+
+# Check that we move consumer further from producer, even if one of them is in a bundle.
+
+---
+name: src_bundle_latency
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: src_bundle_latency
+ ; GCN: $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
+ ; GCN: $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ ; GCN: $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
+ ; GCN: }
+ ; GCN: $vgpr6 = V_ADD_F32_e32 killed $vgpr0, $vgpr0, implicit $exec
+ ; GCN: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $exec
+ $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
+ $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+ $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
+ }
+ $vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $exec
+ $vgpr6 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name: dst_bundle_latency
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; GCN-LABEL: name: dst_bundle_latency
+ ; GCN: $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $exec
+ ; GCN: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $exec
+ ; GCN: BUNDLE killed $vgpr0, killed $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
+ ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, 0, 0, implicit $exec
+ ; GCN: GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, 0, 0, implicit $exec
+ ; GCN: }
+ $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $exec
+ $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $exec
+ BUNDLE $vgpr0, $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
+ GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
+ GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
+ }
+...
Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -730,14 +730,26 @@
auto Reg = Dep.getReg();
MachineBasicBlock::const_instr_iterator I(SrcI->getIterator());
MachineBasicBlock::const_instr_iterator E(SrcI->getParent()->instr_end());
+ unsigned Lat = 0;
for (++I; I != E && I->isBundledWithPred(); ++I) {
- if (!I->modifiesRegister(Reg, TRI))
- continue;
- Dep.setLatency(InstrInfo.getInstrLatency(getInstrItineraryData(), *I));
- break;
+ if (I->modifiesRegister(Reg, TRI))
+ Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *I);
+ else if (Lat)
+ --Lat;
}
+ Dep.setLatency(Lat);
} else if (DstI->isBundle()) {
- Dep.setLatency(InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI));
+ const SIRegisterInfo *TRI = getRegisterInfo();
+ auto Reg = Dep.getReg();
+ MachineBasicBlock::const_instr_iterator I(DstI->getIterator());
+ MachineBasicBlock::const_instr_iterator E(DstI->getParent()->instr_end());
+ unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI);
+ for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {
+ if (I->readsRegister(Reg, TRI))
+ break;
+ --Lat;
+ }
+ Dep.setLatency(Lat);
}
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D72669.237817.patch
Type: text/x-patch
Size: 3802 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200114/5f7414bf/attachment.bin>
More information about the llvm-commits
mailing list