[PATCH] D72669: [AMDGPU] Model distance to instruction in bundle

Stanislav Mekhanoshin via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 14 01:23:37 PST 2020


This revision was automatically updated to reflect the committed changes.
Closed by commit rGad741853c388: [AMDGPU] Model distance to instruction in bundle (authored by rampitec).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D72669/new/

https://reviews.llvm.org/D72669

Files:
  llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
  llvm/test/CodeGen/AMDGPU/bundle-latency.mir


Index: llvm/test/CodeGen/AMDGPU/bundle-latency.mir
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/bundle-latency.mir
@@ -0,0 +1,44 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass=post-RA-sched %s -o - | FileCheck -check-prefix=GCN %s
+
+# Check that we move consumer further from producer, even if one of them is in a bundle.
+
+---
+name:            src_bundle_latency
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: src_bundle_latency
+    ; GCN: $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
+    ; GCN:   $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+    ; GCN:   $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
+    ; GCN: }
+    ; GCN: $vgpr6 = V_ADD_F32_e32 killed $vgpr0, $vgpr0, implicit $exec
+    ; GCN: $vgpr5 = V_ADD_F32_e32 killed $vgpr1, $vgpr1, implicit $exec
+    $vgpr0, $vgpr1 = BUNDLE undef $vgpr3_vgpr4, implicit $exec {
+      $vgpr0 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
+      $vgpr1 = GLOBAL_LOAD_DWORD undef $vgpr3_vgpr4, 4, 0, 0, 0, implicit $exec
+    }
+    $vgpr5 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $exec
+    $vgpr6 = V_ADD_F32_e32 $vgpr0, $vgpr0, implicit $exec
+...
+
+---
+name:            dst_bundle_latency
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: dst_bundle_latency
+    ; GCN: $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $exec
+    ; GCN: $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $exec
+    ; GCN: BUNDLE killed $vgpr0, killed $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
+    ; GCN:   GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr1, 0, 0, 0, 0, implicit $exec
+    ; GCN:   GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, killed $vgpr0, 4, 0, 0, 0, implicit $exec
+    ; GCN: }
+    $vgpr0 = V_ADD_F32_e32 undef $vgpr5, undef $vgpr5, implicit $exec
+    $vgpr1 = V_ADD_F32_e32 undef $vgpr6, undef $vgpr6, implicit $exec
+    BUNDLE $vgpr0, $vgpr1, undef $vgpr3_vgpr4, implicit $exec {
+      GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr1, 0, 0, 0, 0, implicit $exec
+      GLOBAL_STORE_DWORD undef $vgpr3_vgpr4, $vgpr0, 4, 0, 0, 0, implicit $exec
+    }
+...
Index: llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -730,14 +730,26 @@
     auto Reg = Dep.getReg();
     MachineBasicBlock::const_instr_iterator I(SrcI->getIterator());
     MachineBasicBlock::const_instr_iterator E(SrcI->getParent()->instr_end());
+    unsigned Lat = 0;
     for (++I; I != E && I->isBundledWithPred(); ++I) {
-      if (!I->modifiesRegister(Reg, TRI))
-        continue;
-      Dep.setLatency(InstrInfo.getInstrLatency(getInstrItineraryData(), *I));
-      break;
+      if (I->modifiesRegister(Reg, TRI))
+        Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *I);
+      else if (Lat)
+        --Lat;
     }
+    Dep.setLatency(Lat);
   } else if (DstI->isBundle()) {
-    Dep.setLatency(InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI));
+    const SIRegisterInfo *TRI = getRegisterInfo();
+    auto Reg = Dep.getReg();
+    MachineBasicBlock::const_instr_iterator I(DstI->getIterator());
+    MachineBasicBlock::const_instr_iterator E(DstI->getParent()->instr_end());
+    unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI);
+    for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {
+      if (I->readsRegister(Reg, TRI))
+        break;
+      --Lat;
+    }
+    Dep.setLatency(Lat);
   }
 }
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D72669.237881.patch
Type: text/x-patch
Size: 3802 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200114/58bae08e/attachment.bin>


More information about the llvm-commits mailing list