[llvm] [RISCV][PoC] Schedule RVV instructions with same type first (PR #95924)

Pengcheng Wang via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 19 03:46:00 PDT 2024


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/95924

>From 5ac4ff3040f8a5a6cc68efffe3349ef9d181ddec Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 18 Jun 2024 21:33:25 +0800
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.6-beta.1
---
 llvm/include/llvm/CodeGen/MachineScheduler.h  | 43 ++++++++--
 llvm/lib/CodeGen/MachineScheduler.cpp         | 34 +-------
 llvm/lib/Target/RISCV/CMakeLists.txt          |  1 +
 .../Target/RISCV/RISCVMachineScheduler.cpp    | 83 +++++++++++++++++++
 llvm/lib/Target/RISCV/RISCVMachineScheduler.h | 42 ++++++++++
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  |  8 +-
 llvm/test/CodeGen/RISCV/rvv/schedule.ll       | 49 +++++++++++
 7 files changed, 215 insertions(+), 45 deletions(-)
 create mode 100644 llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
 create mode 100644 llvm/lib/Target/RISCV/RISCVMachineScheduler.h
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/schedule.ll

diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index b15abf040058e..d1b5b83e5300b 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -1349,14 +1349,6 @@ class PostGenericScheduler : public GenericSchedulerBase {
   void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand);
 };
 
-/// Create the standard converging machine scheduler. This will be used as the
-/// default scheduler if the target does not set a default.
-/// Adds default DAG mutations.
-ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C);
-
-/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
-ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C);
-
 /// If ReorderWhileClustering is set to true, no attempt will be made to
 /// reduce reordering due to store clustering.
 std::unique_ptr<ScheduleDAGMutation>
@@ -1375,6 +1367,41 @@ std::unique_ptr<ScheduleDAGMutation>
 createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
                                const TargetRegisterInfo *TRI);
 
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+/// Adds default DAG mutations.
+template <typename Strategy = GenericScheduler>
+ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C) {
+  ScheduleDAGMILive *DAG =
+      new ScheduleDAGMILive(C, std::make_unique<Strategy>(C));
+  // Register DAG post-processors.
+  //
+  // FIXME: extend the mutation API to allow earlier mutations to instantiate
+  // data and pass it to later mutations. Have a single mutation that gathers
+  // the interesting nodes in one pass.
+  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+
+  const TargetSubtargetInfo &STI = C->MF->getSubtarget();
+  // Add MacroFusion mutation if fusions are not empty.
+  const auto &MacroFusions = STI.getMacroFusions();
+  if (!MacroFusions.empty())
+    DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
+  return DAG;
+}
+
+/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
+template <typename Strategy = PostGenericScheduler>
+ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C) {
+  ScheduleDAGMI *DAG = new ScheduleDAGMI(C, std::make_unique<Strategy>(C),
+                                         /*RemoveKillFlags=*/true);
+  const TargetSubtargetInfo &STI = C->MF->getSubtarget();
+  // Add MacroFusion mutation if fusions are not empty.
+  const auto &MacroFusions = STI.getMacroFusions();
+  if (!MacroFusions.empty())
+    DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
+  return DAG;
+}
+
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINESCHEDULER_H
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index cf72f74380835..ac792ad4d5484 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -2701,7 +2701,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
   unsigned NextCycle = CurrCycle;
   switch (SchedModel->getMicroOpBufferSize()) {
   case 0:
-    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
+    // assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
     break;
   case 1:
     if (ReadyCycle > NextCycle) {
@@ -3847,26 +3847,6 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   }
 }
 
-/// Create the standard converging machine scheduler. This will be used as the
-/// default scheduler if the target does not set a default.
-ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
-  ScheduleDAGMILive *DAG =
-      new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
-  // Register DAG post-processors.
-  //
-  // FIXME: extend the mutation API to allow earlier mutations to instantiate
-  // data and pass it to later mutations. Have a single mutation that gathers
-  // the interesting nodes in one pass.
-  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
-
-  const TargetSubtargetInfo &STI = C->MF->getSubtarget();
-  // Add MacroFusion mutation if fusions are not empty.
-  const auto &MacroFusions = STI.getMacroFusions();
-  if (!MacroFusions.empty())
-    DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
-  return DAG;
-}
-
 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
   return createGenericSchedLive(C);
 }
@@ -4139,18 +4119,6 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   }
 }
 
-ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
-  ScheduleDAGMI *DAG =
-      new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
-                        /*RemoveKillFlags=*/true);
-  const TargetSubtargetInfo &STI = C->MF->getSubtarget();
-  // Add MacroFusion mutation if fusions are not empty.
-  const auto &MacroFusions = STI.getMacroFusions();
-  if (!MacroFusions.empty())
-    DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
-  return DAG;
-}
-
 //===----------------------------------------------------------------------===//
 // ILP Scheduler. Currently for experimental analysis of heuristics.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 8715403f3839a..fe3f213b253f7 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -44,6 +44,7 @@ add_llvm_target(RISCVCodeGen
   RISCVISelDAGToDAG.cpp
   RISCVISelLowering.cpp
   RISCVMachineFunctionInfo.cpp
+  RISCVMachineScheduler.cpp
   RISCVMergeBaseOffset.cpp
   RISCVOptWInstrs.cpp
   RISCVPostRAExpandPseudoInsts.cpp
diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
new file mode 100644
index 0000000000000..d993d840c3d3a
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
@@ -0,0 +1,83 @@
+//===- RISCVMachineScheduler.cpp - MI Scheduler for RISC-V ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVMachineScheduler.h"
+#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/TargetParser/RISCVTargetParser.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-prera-sched-strategy"
+
+static cl::opt<bool> EnableScheduleSameVType(
+    "riscv-enable-schedule-same-vtype", cl::init(false), cl::Hidden,
+    cl::desc("Enable scheduling RVV instructions with same vtype first"));
+
+SUnit *RISCVPreRAMachineSchedStrategy::pickNode(bool &IsTopNode) {
+  if (EnableScheduleSameVType) {
+    for (SUnit *SU : Bot.Available) {
+      MachineInstr *MI = SU->getInstr();
+      const MCInstrDesc &Desc = MI->getDesc();
+      if (RISCVII::hasSEWOp(Desc.TSFlags)) {
+        unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
+        RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags);
+        if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) {
+          Bot.removeReady(SU);
+          IsTopNode = true;
+          return SU;
+        }
+      }
+    }
+    for (SUnit *SU : Bot.Pending) {
+      MachineInstr *MI = SU->getInstr();
+      const MCInstrDesc &Desc = MI->getDesc();
+      if (RISCVII::hasSEWOp(Desc.TSFlags)) {
+        unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
+        RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags);
+        if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) {
+          Bot.removeReady(SU);
+          IsTopNode = false;
+          return SU;
+        }
+      }
+    }
+  }
+  return GenericScheduler::pickNode(IsTopNode);
+}
+
+bool RISCVPreRAMachineSchedStrategy::tryCandidate(SchedCandidate &Cand,
+                                                  SchedCandidate &TryCand,
+                                                  SchedBoundary *Zone) const {
+  bool OriginalResult = GenericScheduler::tryCandidate(Cand, TryCand, Zone);
+
+  return OriginalResult;
+}
+
+void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+  GenericScheduler::schedNode(SU, IsTopNode);
+  MachineInstr *MI = SU->getInstr();
+  const MCInstrDesc &Desc = MI->getDesc();
+  if (RISCVII::hasSEWOp(Desc.TSFlags)) {
+    PrevVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
+    PrevVLMUL = RISCVII::getLMul(Desc.TSFlags);
+  }
+  LLVM_DEBUG(dbgs() << "Previous scheduled Unit: ";
+             dbgs() << "SU(" << SU->NodeNum << ") - "; SU->getInstr()->dump(););
+  LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n";
+             auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL);
+             dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "")
+                    << LMUL.first << "\n";);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h
new file mode 100644
index 0000000000000..bd806cef57dcb
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h
@@ -0,0 +1,42 @@
+//===--- RISCVMachineScheduler.h - Custom RISC-V MI scheduler ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom RISC-V MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_RISCV_RISCVMACHINESCHEDULER_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/TargetParser/RISCVTargetParser.h"
+
+namespace llvm {
+
+/// A GenericScheduler implementation for RISCV pre RA scheduling.
+class RISCVPreRAMachineSchedStrategy : public GenericScheduler {
+private:
+  RISCVII::VLMUL PrevVLMUL;
+  unsigned PrevVSEW;
+
+public:
+  RISCVPreRAMachineSchedStrategy(const MachineSchedContext *C)
+      : GenericScheduler(C) {}
+
+protected:
+  SUnit *pickNode(bool &IsTopNode) override;
+
+  bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+                    SchedBoundary *Zone) const override;
+
+  void schedNode(SUnit *SU, bool IsTopNode) override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 35d0b3408d09f..e0dcbbddc3f53 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -14,6 +14,7 @@
 #include "MCTargetDesc/RISCVBaseInfo.h"
 #include "RISCV.h"
 #include "RISCVMachineFunctionInfo.h"
+#include "RISCVMachineScheduler.h"
 #include "RISCVTargetObjectFile.h"
 #include "RISCVTargetTransformInfo.h"
 #include "TargetInfo/RISCVTargetInfo.h"
@@ -340,12 +341,11 @@ class RISCVPassConfig : public TargetPassConfig {
 
   ScheduleDAGInstrs *
   createMachineScheduler(MachineSchedContext *C) const override {
-    ScheduleDAGMILive *DAG = nullptr;
-    if (EnableMISchedLoadClustering) {
-      DAG = createGenericSchedLive(C);
+    ScheduleDAGMILive *DAG =
+        createGenericSchedLive<RISCVPreRAMachineSchedStrategy>(C);
+    if (EnableMISchedLoadClustering)
       DAG->addMutation(createLoadClusterDAGMutation(
           DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
-    }
     return DAG;
   }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/schedule.ll b/llvm/test/CodeGen/RISCV/rvv/schedule.ll
new file mode 100644
index 0000000000000..baf15ef400df5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/schedule.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=DEFAULT
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=SAME-VTYPE-FIRST
+
+define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) {
+; DEFAULT-LABEL: test:
+; DEFAULT:       # %bb.0: # %entry
+; DEFAULT-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; DEFAULT-NEXT:    vdiv.vv v12, v8, v9
+; DEFAULT-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; DEFAULT-NEXT:    vdiv.vv v13, v10, v11
+; DEFAULT-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; DEFAULT-NEXT:    vadd.vv v8, v8, v9
+; DEFAULT-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; DEFAULT-NEXT:    vadd.vv v9, v10, v11
+; DEFAULT-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; DEFAULT-NEXT:    vadd.vv v8, v8, v12
+; DEFAULT-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; DEFAULT-NEXT:    vadd.vv v9, v9, v13
+; DEFAULT-NEXT:    vwadd.wv v8, v8, v9
+; DEFAULT-NEXT:    ret
+;
+; SAME-VTYPE-FIRST-LABEL: test:
+; SAME-VTYPE-FIRST:       # %bb.0: # %entry
+; SAME-VTYPE-FIRST-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; SAME-VTYPE-FIRST-NEXT:    vadd.vv v12, v8, v9
+; SAME-VTYPE-FIRST-NEXT:    vdiv.vv v8, v8, v9
+; SAME-VTYPE-FIRST-NEXT:    vadd.vv v8, v12, v8
+; SAME-VTYPE-FIRST-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-NEXT:    vadd.vv v9, v10, v11
+; SAME-VTYPE-FIRST-NEXT:    vdiv.vv v10, v10, v11
+; SAME-VTYPE-FIRST-NEXT:    vadd.vv v9, v9, v10
+; SAME-VTYPE-FIRST-NEXT:    vwadd.wv v8, v8, v9
+; SAME-VTYPE-FIRST-NEXT:    ret
+entry:
+  %0 = add <vscale x 1 x i64> %v64_0, %v64_1
+  %1 = add <vscale x 1 x i32> %v32_0, %v32_1
+  %2 = sdiv <vscale x 1 x i64> %v64_0, %v64_1
+  %3 = sdiv <vscale x 1 x i32> %v32_0, %v32_1
+  %4 = add <vscale x 1 x i64> %0, %2
+  %5 = add <vscale x 1 x i32> %1, %3
+
+  %6 = sext <vscale x 1 x i32> %5 to <vscale x 1 x i64>
+  %7 = add <vscale x 1 x i64> %4, %6
+  ret <vscale x 1 x i64> %7
+}
+

>From 185e0f8266c2dec9a161328c6c14490fe3cffa69 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Wed, 19 Jun 2024 18:45:50 +0800
Subject: [PATCH 2/2] Support buttomup/topdown/bidirectional and fix some
 failures

Created using spr 1.3.6-beta.1
---
 .../Target/RISCV/RISCVMachineScheduler.cpp    |  79 ++++++++---
 llvm/test/CodeGen/RISCV/rvv/schedule.ll       | 125 +++++++++++++++---
 2 files changed, 165 insertions(+), 39 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
index d993d840c3d3a..530d4f6b2d845 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
@@ -27,34 +27,68 @@ static cl::opt<bool> EnableScheduleSameVType(
     cl::desc("Enable scheduling RVV instructions with same vtype first"));
 
 SUnit *RISCVPreRAMachineSchedStrategy::pickNode(bool &IsTopNode) {
-  if (EnableScheduleSameVType) {
-    for (SUnit *SU : Bot.Available) {
+  auto FindPotentialRVVInstructionInQueue =
+      [&](SchedBoundary &Boundary, ReadyQueue Q, bool ShouldBeTop) -> SUnit * {
+    for (SUnit *SU : Q) {
+      if (SU->isScheduled)
+        continue;
+
       MachineInstr *MI = SU->getInstr();
       const MCInstrDesc &Desc = MI->getDesc();
       if (RISCVII::hasSEWOp(Desc.TSFlags)) {
         unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
         RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags);
+        // FIXME: We should consider vl and policy here.
         if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) {
-          Bot.removeReady(SU);
-          IsTopNode = true;
+          IsTopNode = ShouldBeTop;
+          // Boundary.removeReady(SU);
+          if (SU->isTopReady())
+            Top.removeReady(SU);
+          if (SU->isBottomReady())
+            Bot.removeReady(SU);
+          LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
+                            << *SU->getInstr());
           return SU;
         }
       }
     }
-    for (SUnit *SU : Bot.Pending) {
-      MachineInstr *MI = SU->getInstr();
-      const MCInstrDesc &Desc = MI->getDesc();
-      if (RISCVII::hasSEWOp(Desc.TSFlags)) {
-        unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
-        RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags);
-        if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) {
-          Bot.removeReady(SU);
-          IsTopNode = false;
-          return SU;
-        }
-      }
+    return nullptr;
+  };
+
+  auto FindPotentialRVVInstruction = [&](SchedBoundary &Boundary,
+                                         bool ShouldBeTop) -> SUnit * {
+    if (SUnit *Available = FindPotentialRVVInstructionInQueue(
+            Boundary, Boundary.Available, ShouldBeTop))
+      return Available;
+    if (SUnit *Pending = FindPotentialRVVInstructionInQueue(
+            Boundary, Boundary.Pending, ShouldBeTop))
+      return Pending;
+    return nullptr;
+  };
+
+  if (EnableScheduleSameVType) {
+    if (RegionPolicy.OnlyBottomUp) {
+      if (SUnit *SU = FindPotentialRVVInstruction(Bot, false))
+        return SU;
+    } else if (RegionPolicy.OnlyTopDown) {
+      if (SUnit *SU = FindPotentialRVVInstruction(Top, true))
+        return SU;
+    } else {
+      if (SUnit *SU =
+              FindPotentialRVVInstructionInQueue(Bot, Bot.Available, false))
+        return SU;
+      if (SUnit *SU =
+              FindPotentialRVVInstructionInQueue(Top, Top.Available, true))
+        return SU;
+      if (SUnit *SU =
+              FindPotentialRVVInstructionInQueue(Bot, Bot.Pending, false))
+        return SU;
+      if (SUnit *SU =
+              FindPotentialRVVInstructionInQueue(Top, Top.Pending, true))
+        return SU;
     }
   }
+
   return GenericScheduler::pickNode(IsTopNode);
 }
 
@@ -73,11 +107,12 @@ void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
   if (RISCVII::hasSEWOp(Desc.TSFlags)) {
     PrevVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
     PrevVLMUL = RISCVII::getLMul(Desc.TSFlags);
+    LLVM_DEBUG(dbgs() << "Previous scheduled Unit: ";
+               dbgs() << "SU(" << SU->NodeNum << ") - ";
+               SU->getInstr()->dump(););
+    LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n";
+               auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL);
+               dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "")
+                      << LMUL.first << "\n";);
   }
-  LLVM_DEBUG(dbgs() << "Previous scheduled Unit: ";
-             dbgs() << "SU(" << SU->NodeNum << ") - "; SU->getInstr()->dump(););
-  LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n";
-             auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL);
-             dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "")
-                    << LMUL.first << "\n";);
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/schedule.ll b/llvm/test/CodeGen/RISCV/rvv/schedule.ll
index baf15ef400df5..6b466d802ac4a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/schedule.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/schedule.ll
@@ -1,15 +1,33 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefix=DEFAULT
-; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s --check-prefix=SAME-VTYPE-FIRST
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=true -misched-topdown=false \
+; RUN:   -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-BOTTOMUP
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=false -misched-topdown=true \
+; RUN:   -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-TOPDOWN
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=false -misched-topdown=false \
+; RUN:   -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-BIDIRECTIONAL
 
-define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) {
+declare void @consume(i64 %scalar, <vscale x 1 x i64> %vector)
+
+define void @test(i64 %a, i64 %b, <vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) {
 ; DEFAULT-LABEL: test:
 ; DEFAULT:       # %bb.0: # %entry
-; DEFAULT-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; DEFAULT-NEXT:    addi sp, sp, -16
+; DEFAULT-NEXT:    .cfi_def_cfa_offset 16
+; DEFAULT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; DEFAULT-NEXT:    .cfi_offset ra, -8
+; DEFAULT-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
 ; DEFAULT-NEXT:    vdiv.vv v12, v8, v9
 ; DEFAULT-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; DEFAULT-NEXT:    div a2, a0, a1
+; DEFAULT-NEXT:    add a3, a0, a1
+; DEFAULT-NEXT:    mul a0, a0, a1
+; DEFAULT-NEXT:    add a0, a0, a3
+; DEFAULT-NEXT:    add a0, a0, a2
 ; DEFAULT-NEXT:    vdiv.vv v13, v10, v11
 ; DEFAULT-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
 ; DEFAULT-NEXT:    vadd.vv v8, v8, v9
@@ -20,30 +38,103 @@ define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v
 ; DEFAULT-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; DEFAULT-NEXT:    vadd.vv v9, v9, v13
 ; DEFAULT-NEXT:    vwadd.wv v8, v8, v9
+; DEFAULT-NEXT:    call consume
+; DEFAULT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; DEFAULT-NEXT:    addi sp, sp, 16
 ; DEFAULT-NEXT:    ret
 ;
-; SAME-VTYPE-FIRST-LABEL: test:
-; SAME-VTYPE-FIRST:       # %bb.0: # %entry
-; SAME-VTYPE-FIRST-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; SAME-VTYPE-FIRST-NEXT:    vadd.vv v12, v8, v9
-; SAME-VTYPE-FIRST-NEXT:    vdiv.vv v8, v8, v9
-; SAME-VTYPE-FIRST-NEXT:    vadd.vv v8, v12, v8
-; SAME-VTYPE-FIRST-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; SAME-VTYPE-FIRST-NEXT:    vadd.vv v9, v10, v11
-; SAME-VTYPE-FIRST-NEXT:    vdiv.vv v10, v10, v11
-; SAME-VTYPE-FIRST-NEXT:    vadd.vv v9, v9, v10
-; SAME-VTYPE-FIRST-NEXT:    vwadd.wv v8, v8, v9
-; SAME-VTYPE-FIRST-NEXT:    ret
+; SAME-VTYPE-FIRST-BOTTOMUP-LABEL: test:
+; SAME-VTYPE-FIRST-BOTTOMUP:       # %bb.0: # %entry
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    addi sp, sp, -16
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    .cfi_def_cfa_offset 16
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    .cfi_offset ra, -8
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vadd.vv v12, v8, v9
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    div a2, a0, a1
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    add a3, a0, a1
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vdiv.vv v8, v8, v9
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    mul a0, a0, a1
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    add a0, a0, a3
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    add a0, a0, a2
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vadd.vv v8, v12, v8
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vadd.vv v9, v10, v11
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vdiv.vv v10, v10, v11
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vadd.vv v9, v9, v10
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vwadd.wv v8, v8, v9
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    call consume
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    addi sp, sp, 16
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    ret
+;
+; SAME-VTYPE-FIRST-TOPDOWN-LABEL: test:
+; SAME-VTYPE-FIRST-TOPDOWN:       # %bb.0: # %entry
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    addi sp, sp, -16
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    .cfi_def_cfa_offset 16
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    .cfi_offset ra, -8
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vsetvli a3, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vadd.vv v12, v10, v11
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vdiv.vv v10, v10, v11
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    add a2, a0, a1
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    mul a3, a0, a1
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    div a0, a0, a1
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    add a2, a2, a3
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vadd.vv v10, v12, v10
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vadd.vv v11, v8, v9
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vdiv.vv v8, v8, v9
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    add a0, a0, a2
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vadd.vv v8, v11, v8
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vwadd.wv v8, v8, v10
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    call consume
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    addi sp, sp, 16
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    ret
+;
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-LABEL: test:
+; SAME-VTYPE-FIRST-BIDIRECTIONAL:       # %bb.0: # %entry
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    addi sp, sp, -16
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    .cfi_def_cfa_offset 16
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    .cfi_offset ra, -8
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vadd.vv v12, v10, v11
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vdiv.vv v10, v10, v11
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    div a2, a0, a1
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    add a3, a0, a1
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    mul a0, a0, a1
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    add a0, a0, a3
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vadd.vv v11, v8, v9
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vdiv.vv v8, v8, v9
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    add a0, a0, a2
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vadd.vv v8, v11, v8
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vadd.vv v9, v12, v10
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vwadd.wv v8, v8, v9
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    call consume
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    addi sp, sp, 16
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    ret
 entry:
   %0 = add <vscale x 1 x i64> %v64_0, %v64_1
+  %scalar0 = add i64 %a, %b
   %1 = add <vscale x 1 x i32> %v32_0, %v32_1
   %2 = sdiv <vscale x 1 x i64> %v64_0, %v64_1
+  %scalar1 = mul i64 %a, %b
   %3 = sdiv <vscale x 1 x i32> %v32_0, %v32_1
   %4 = add <vscale x 1 x i64> %0, %2
+  %scalar2 = sdiv i64 %a, %b
   %5 = add <vscale x 1 x i32> %1, %3
 
   %6 = sext <vscale x 1 x i32> %5 to <vscale x 1 x i64>
+  %scalar3 = add i64 %scalar0, %scalar1
   %7 = add <vscale x 1 x i64> %4, %6
-  ret <vscale x 1 x i64> %7
+  %scalar4 = add i64 %scalar2, %scalar3
+  call void @consume(i64 %scalar4, <vscale x 1 x i64> %7)
+  ret void
 }
 



More information about the llvm-commits mailing list