[llvm] 8a12f20 - [AMDGPU] Update the mechanism used to check for cycles and add eges in power-sched mutation

via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 14 16:24:40 PDT 2022


Author: jeff
Date: 2022-07-14T16:24:13-07:00
New Revision: 8a12f20ef7732ebad2a91417f577f1af0c470765

URL: https://github.com/llvm/llvm-project/commit/8a12f20ef7732ebad2a91417f577f1af0c470765
DIFF: https://github.com/llvm/llvm-project/commit/8a12f20ef7732ebad2a91417f577f1af0c470765.diff

LOG: [AMDGPU] Update the mechanism used to check for cycles and add eges in power-sched mutation

Added: 
    llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 6bd906439ee8..e5d2e16f3238 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -852,34 +852,6 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
     return MI && TII->isVALU(*MI);
   }
 
-  bool canAddEdge(const SUnit *Succ, const SUnit *Pred) const {
-    if (Pred->NodeNum < Succ->NodeNum)
-      return true;
-
-    SmallVector<const SUnit*, 64> Succs({Succ}), Preds({Pred});
-
-    for (unsigned I = 0; I < Succs.size(); ++I) {
-      for (const SDep &SI : Succs[I]->Succs) {
-        const SUnit *SU = SI.getSUnit();
-        if (SU != Succs[I] && !llvm::is_contained(Succs, SU))
-          Succs.push_back(SU);
-      }
-    }
-
-    SmallPtrSet<const SUnit*, 32> Visited;
-    while (!Preds.empty()) {
-      const SUnit *SU = Preds.pop_back_val();
-      if (llvm::is_contained(Succs, SU))
-        return false;
-      Visited.insert(SU);
-      for (const SDep &SI : SU->Preds)
-        if (SI.getSUnit() != SU && !Visited.count(SI.getSUnit()))
-          Preds.push_back(SI.getSUnit());
-    }
-
-    return true;
-  }
-
   // Link as many SALU instructions in chain as possible. Return the size
   // of the chain. Links up to MaxChain instructions.
   unsigned linkSALUChain(SUnit *From, SUnit *To, unsigned MaxChain,
@@ -895,18 +867,20 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
       LLVM_DEBUG(dbgs() << "Inserting edge from\n" ; DAG->dumpNode(*From);
                  dbgs() << "to\n"; DAG->dumpNode(*SU); dbgs() << '\n');
 
-      if (SU->addPred(SDep(From, SDep::Artificial), false))
-        ++Linked;
+      if (SU != From && From != &DAG->ExitSU && DAG->canAddEdge(SU, From))
+        if (DAG->addEdge(SU, SDep(From, SDep::Artificial)))
+          ++Linked;
 
       for (SDep &SI : From->Succs) {
         SUnit *SUv = SI.getSUnit();
-        if (SUv != From && isVALU(SUv) && canAddEdge(SUv, SU))
-          SUv->addPred(SDep(SU, SDep::Artificial), false);
+        if (SUv != From && SU != &DAG->ExitSU && isVALU(SUv) &&
+            DAG->canAddEdge(SUv, SU))
+          DAG->addEdge(SUv, SDep(SU, SDep::Artificial));
       }
 
       for (SDep &SI : SU->Succs) {
         SUnit *Succ = SI.getSUnit();
-        if (Succ != SU && isSALU(Succ) && canAddEdge(From, Succ))
+        if (Succ != SU && isSALU(Succ))
           Worklist.push_back(Succ);
       }
     }
@@ -949,7 +923,8 @@ struct FillMFMAShadowMutation : ScheduleDAGMutation {
         if (Visited.count(&*LastSALU))
           continue;
 
-        if (!isSALU(&*LastSALU) || !canAddEdge(&*LastSALU, &SU))
+        if (&SU == &DAG->ExitSU || &SU == &*LastSALU || !isSALU(&*LastSALU) ||
+            !DAG->canAddEdge(&*LastSALU, &SU))
           continue;
 
         Lat -= linkSALUChain(&SU, &*LastSALU, Lat, Visited);

diff  --git a/llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir b/llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir
new file mode 100644
index 000000000000..f6357154cbeb
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/power-sched-no-cycle.mir
@@ -0,0 +1,26 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=postmisched %s -o - -amdgpu-enable-power-sched=true  2>&1 | FileCheck %s
+# This test represents a pattern which caused power-sched to introduce cycles into the Scheduling graph. By virtue of this test not crashing indicates it has completed succesfully.
+
+---
+name:            power_sched_cycle_condition
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr1, $sgpr2_sgpr3, $sgpr1, $vgpr72_vgpr73, $vgpr2_vgpr3, $sgpr22, $sgpr6_sgpr7, $sgpr10_sgpr11
+    ; CHECK-LABEL: name: power_sched_cycle_condition
+    ; CHECK: liveins: $vgpr1, $sgpr2_sgpr3, $sgpr1, $vgpr72_vgpr73, $vgpr2_vgpr3, $sgpr22, $sgpr6_sgpr7, $sgpr10_sgpr11
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: $sgpr1 = S_LOAD_DWORD_IMM $sgpr2_sgpr3, 56, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: $sgpr4 = S_LSHL_B32 killed $sgpr22, 1, implicit-def dead $scc
+    ; CHECK-NEXT: $sgpr22_sgpr23 = S_LOAD_DWORDX2_IMM killed $sgpr2_sgpr3, 36, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    ; CHECK-NEXT: $vgpr2 = nsw V_MUL_LO_U32_e64 killed $vgpr1, killed $sgpr1, implicit $exec
+    ; CHECK-NEXT: early-clobber $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21 = V_MFMA_F32_32X32X8F16_vgprcd_e64 killed $vgpr2_vgpr3, $vgpr72_vgpr73, 0, 0, 0, 0, implicit $mode, implicit $exec
+    $sgpr1 = S_LOAD_DWORD_IMM $sgpr2_sgpr3, 56, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+    $vgpr2 = nsw V_MUL_LO_U32_e64 $vgpr1, $sgpr1, implicit $exec
+    $vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21 =  V_MFMA_F32_32X32X8F16_vgprcd_e64 $vgpr2_vgpr3, $vgpr72_vgpr73, 0, 0, 0, 0, implicit $mode, implicit $exec
+    $sgpr4 = S_LSHL_B32 $sgpr22, 1, implicit-def dead $scc
+    $sgpr22_sgpr23 = S_LOAD_DWORDX2_IMM $sgpr2_sgpr3, 36, 0 :: (dereferenceable invariant load (s64), addrspace 4)
+...
+
+


        


More information about the llvm-commits mailing list