[llvm-branch-commits] [llvm] [RISCV] Schedule RVV instructions with compatible type first (PR #95924)

Pengcheng Wang via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Jan 7 22:51:34 PST 2026


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/95924

>From 5ac4ff3040f8a5a6cc68efffe3349ef9d181ddec Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 18 Jun 2024 21:33:25 +0800
Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
 =?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.6-beta.1
---
 llvm/include/llvm/CodeGen/MachineScheduler.h  | 43 ++++++++--
 llvm/lib/CodeGen/MachineScheduler.cpp         | 34 +-------
 llvm/lib/Target/RISCV/CMakeLists.txt          |  1 +
 .../Target/RISCV/RISCVMachineScheduler.cpp    | 83 +++++++++++++++++++
 llvm/lib/Target/RISCV/RISCVMachineScheduler.h | 42 ++++++++++
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  |  8 +-
 llvm/test/CodeGen/RISCV/rvv/schedule.ll       | 49 +++++++++++
 7 files changed, 215 insertions(+), 45 deletions(-)
 create mode 100644 llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
 create mode 100644 llvm/lib/Target/RISCV/RISCVMachineScheduler.h
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/schedule.ll

diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index b15abf040058e..d1b5b83e5300b 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -1349,14 +1349,6 @@ class PostGenericScheduler : public GenericSchedulerBase {
   void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand);
 };
 
-/// Create the standard converging machine scheduler. This will be used as the
-/// default scheduler if the target does not set a default.
-/// Adds default DAG mutations.
-ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C);
-
-/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
-ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C);
-
 /// If ReorderWhileClustering is set to true, no attempt will be made to
 /// reduce reordering due to store clustering.
 std::unique_ptr<ScheduleDAGMutation>
@@ -1375,6 +1367,41 @@ std::unique_ptr<ScheduleDAGMutation>
 createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
                                const TargetRegisterInfo *TRI);
 
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+/// Adds default DAG mutations.
+template <typename Strategy = GenericScheduler>
+ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C) {
+  ScheduleDAGMILive *DAG =
+      new ScheduleDAGMILive(C, std::make_unique<Strategy>(C));
+  // Register DAG post-processors.
+  //
+  // FIXME: extend the mutation API to allow earlier mutations to instantiate
+  // data and pass it to later mutations. Have a single mutation that gathers
+  // the interesting nodes in one pass.
+  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+
+  const TargetSubtargetInfo &STI = C->MF->getSubtarget();
+  // Add MacroFusion mutation if fusions are not empty.
+  const auto &MacroFusions = STI.getMacroFusions();
+  if (!MacroFusions.empty())
+    DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
+  return DAG;
+}
+
+/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
+template <typename Strategy = PostGenericScheduler>
+ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C) {
+  ScheduleDAGMI *DAG = new ScheduleDAGMI(C, std::make_unique<Strategy>(C),
+                                         /*RemoveKillFlags=*/true);
+  const TargetSubtargetInfo &STI = C->MF->getSubtarget();
+  // Add MacroFusion mutation if fusions are not empty.
+  const auto &MacroFusions = STI.getMacroFusions();
+  if (!MacroFusions.empty())
+    DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
+  return DAG;
+}
+
 } // end namespace llvm
 
 #endif // LLVM_CODEGEN_MACHINESCHEDULER_H
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index cf72f74380835..ac792ad4d5484 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -2701,7 +2701,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
   unsigned NextCycle = CurrCycle;
   switch (SchedModel->getMicroOpBufferSize()) {
   case 0:
-    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
+    // assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
     break;
   case 1:
     if (ReadyCycle > NextCycle) {
@@ -3847,26 +3847,6 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   }
 }
 
-/// Create the standard converging machine scheduler. This will be used as the
-/// default scheduler if the target does not set a default.
-ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
-  ScheduleDAGMILive *DAG =
-      new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
-  // Register DAG post-processors.
-  //
-  // FIXME: extend the mutation API to allow earlier mutations to instantiate
-  // data and pass it to later mutations. Have a single mutation that gathers
-  // the interesting nodes in one pass.
-  DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
-
-  const TargetSubtargetInfo &STI = C->MF->getSubtarget();
-  // Add MacroFusion mutation if fusions are not empty.
-  const auto &MacroFusions = STI.getMacroFusions();
-  if (!MacroFusions.empty())
-    DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
-  return DAG;
-}
-
 static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
   return createGenericSchedLive(C);
 }
@@ -4139,18 +4119,6 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
   }
 }
 
-ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
-  ScheduleDAGMI *DAG =
-      new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
-                        /*RemoveKillFlags=*/true);
-  const TargetSubtargetInfo &STI = C->MF->getSubtarget();
-  // Add MacroFusion mutation if fusions are not empty.
-  const auto &MacroFusions = STI.getMacroFusions();
-  if (!MacroFusions.empty())
-    DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
-  return DAG;
-}
-
 //===----------------------------------------------------------------------===//
 // ILP Scheduler. Currently for experimental analysis of heuristics.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 8715403f3839a..fe3f213b253f7 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -44,6 +44,7 @@ add_llvm_target(RISCVCodeGen
   RISCVISelDAGToDAG.cpp
   RISCVISelLowering.cpp
   RISCVMachineFunctionInfo.cpp
+  RISCVMachineScheduler.cpp
   RISCVMergeBaseOffset.cpp
   RISCVOptWInstrs.cpp
   RISCVPostRAExpandPseudoInsts.cpp
diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
new file mode 100644
index 0000000000000..d993d840c3d3a
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
@@ -0,0 +1,83 @@
+//===- RISCVMachineScheduler.cpp - MI Scheduler for RISC-V ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVMachineScheduler.h"
+#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/TargetParser/RISCVTargetParser.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-prera-sched-strategy"
+
+static cl::opt<bool> EnableScheduleSameVType(
+    "riscv-enable-schedule-same-vtype", cl::init(false), cl::Hidden,
+    cl::desc("Enable scheduling RVV instructions with same vtype first"));
+
+SUnit *RISCVPreRAMachineSchedStrategy::pickNode(bool &IsTopNode) {
+  if (EnableScheduleSameVType) {
+    for (SUnit *SU : Bot.Available) {
+      MachineInstr *MI = SU->getInstr();
+      const MCInstrDesc &Desc = MI->getDesc();
+      if (RISCVII::hasSEWOp(Desc.TSFlags)) {
+        unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
+        RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags);
+        if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) {
+          Bot.removeReady(SU);
+          IsTopNode = true;
+          return SU;
+        }
+      }
+    }
+    for (SUnit *SU : Bot.Pending) {
+      MachineInstr *MI = SU->getInstr();
+      const MCInstrDesc &Desc = MI->getDesc();
+      if (RISCVII::hasSEWOp(Desc.TSFlags)) {
+        unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
+        RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags);
+        if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) {
+          Bot.removeReady(SU);
+          IsTopNode = false;
+          return SU;
+        }
+      }
+    }
+  }
+  return GenericScheduler::pickNode(IsTopNode);
+}
+
+bool RISCVPreRAMachineSchedStrategy::tryCandidate(SchedCandidate &Cand,
+                                                  SchedCandidate &TryCand,
+                                                  SchedBoundary *Zone) const {
+  bool OriginalResult = GenericScheduler::tryCandidate(Cand, TryCand, Zone);
+
+  return OriginalResult;
+}
+
+void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+  GenericScheduler::schedNode(SU, IsTopNode);
+  MachineInstr *MI = SU->getInstr();
+  const MCInstrDesc &Desc = MI->getDesc();
+  if (RISCVII::hasSEWOp(Desc.TSFlags)) {
+    PrevVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
+    PrevVLMUL = RISCVII::getLMul(Desc.TSFlags);
+  }
+  LLVM_DEBUG(dbgs() << "Previous scheduled Unit: ";
+             dbgs() << "SU(" << SU->NodeNum << ") - "; SU->getInstr()->dump(););
+  LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n";
+             auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL);
+             dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "")
+                    << LMUL.first << "\n";);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h
new file mode 100644
index 0000000000000..bd806cef57dcb
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h
@@ -0,0 +1,42 @@
+//===--- RISCVMachineScheduler.h - Custom RISC-V MI scheduler ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom RISC-V MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_RISCV_RISCVMACHINESCHEDULER_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/TargetParser/RISCVTargetParser.h"
+
+namespace llvm {
+
+/// A GenericScheduler implementation for RISCV pre RA scheduling.
+class RISCVPreRAMachineSchedStrategy : public GenericScheduler {
+private:
+  RISCVII::VLMUL PrevVLMUL;
+  unsigned PrevVSEW;
+
+public:
+  RISCVPreRAMachineSchedStrategy(const MachineSchedContext *C)
+      : GenericScheduler(C) {}
+
+protected:
+  SUnit *pickNode(bool &IsTopNode) override;
+
+  bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+                    SchedBoundary *Zone) const override;
+
+  void schedNode(SUnit *SU, bool IsTopNode) override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 35d0b3408d09f..e0dcbbddc3f53 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -14,6 +14,7 @@
 #include "MCTargetDesc/RISCVBaseInfo.h"
 #include "RISCV.h"
 #include "RISCVMachineFunctionInfo.h"
+#include "RISCVMachineScheduler.h"
 #include "RISCVTargetObjectFile.h"
 #include "RISCVTargetTransformInfo.h"
 #include "TargetInfo/RISCVTargetInfo.h"
@@ -340,12 +341,11 @@ class RISCVPassConfig : public TargetPassConfig {
 
   ScheduleDAGInstrs *
   createMachineScheduler(MachineSchedContext *C) const override {
-    ScheduleDAGMILive *DAG = nullptr;
-    if (EnableMISchedLoadClustering) {
-      DAG = createGenericSchedLive(C);
+    ScheduleDAGMILive *DAG =
+        createGenericSchedLive<RISCVPreRAMachineSchedStrategy>(C);
+    if (EnableMISchedLoadClustering)
       DAG->addMutation(createLoadClusterDAGMutation(
           DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
-    }
     return DAG;
   }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/schedule.ll b/llvm/test/CodeGen/RISCV/rvv/schedule.ll
new file mode 100644
index 0000000000000..baf15ef400df5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/schedule.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=DEFAULT
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=SAME-VTYPE-FIRST
+
+define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) {
+; DEFAULT-LABEL: test:
+; DEFAULT:       # %bb.0: # %entry
+; DEFAULT-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; DEFAULT-NEXT:    vdiv.vv v12, v8, v9
+; DEFAULT-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; DEFAULT-NEXT:    vdiv.vv v13, v10, v11
+; DEFAULT-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; DEFAULT-NEXT:    vadd.vv v8, v8, v9
+; DEFAULT-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; DEFAULT-NEXT:    vadd.vv v9, v10, v11
+; DEFAULT-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; DEFAULT-NEXT:    vadd.vv v8, v8, v12
+; DEFAULT-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; DEFAULT-NEXT:    vadd.vv v9, v9, v13
+; DEFAULT-NEXT:    vwadd.wv v8, v8, v9
+; DEFAULT-NEXT:    ret
+;
+; SAME-VTYPE-FIRST-LABEL: test:
+; SAME-VTYPE-FIRST:       # %bb.0: # %entry
+; SAME-VTYPE-FIRST-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; SAME-VTYPE-FIRST-NEXT:    vadd.vv v12, v8, v9
+; SAME-VTYPE-FIRST-NEXT:    vdiv.vv v8, v8, v9
+; SAME-VTYPE-FIRST-NEXT:    vadd.vv v8, v12, v8
+; SAME-VTYPE-FIRST-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-NEXT:    vadd.vv v9, v10, v11
+; SAME-VTYPE-FIRST-NEXT:    vdiv.vv v10, v10, v11
+; SAME-VTYPE-FIRST-NEXT:    vadd.vv v9, v9, v10
+; SAME-VTYPE-FIRST-NEXT:    vwadd.wv v8, v8, v9
+; SAME-VTYPE-FIRST-NEXT:    ret
+entry:
+  %0 = add <vscale x 1 x i64> %v64_0, %v64_1
+  %1 = add <vscale x 1 x i32> %v32_0, %v32_1
+  %2 = sdiv <vscale x 1 x i64> %v64_0, %v64_1
+  %3 = sdiv <vscale x 1 x i32> %v32_0, %v32_1
+  %4 = add <vscale x 1 x i64> %0, %2
+  %5 = add <vscale x 1 x i32> %1, %3
+
+  %6 = sext <vscale x 1 x i32> %5 to <vscale x 1 x i64>
+  %7 = add <vscale x 1 x i64> %4, %6
+  ret <vscale x 1 x i64> %7
+}
+

>From 185e0f8266c2dec9a161328c6c14490fe3cffa69 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Wed, 19 Jun 2024 18:45:50 +0800
Subject: [PATCH 2/4] Support buttomup/topdown/bidirectional and fix some
 failures

Created using spr 1.3.6-beta.1
---
 .../Target/RISCV/RISCVMachineScheduler.cpp    |  79 ++++++++---
 llvm/test/CodeGen/RISCV/rvv/schedule.ll       | 125 +++++++++++++++---
 2 files changed, 165 insertions(+), 39 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
index d993d840c3d3a..530d4f6b2d845 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
@@ -27,34 +27,68 @@ static cl::opt<bool> EnableScheduleSameVType(
     cl::desc("Enable scheduling RVV instructions with same vtype first"));
 
 SUnit *RISCVPreRAMachineSchedStrategy::pickNode(bool &IsTopNode) {
-  if (EnableScheduleSameVType) {
-    for (SUnit *SU : Bot.Available) {
+  auto FindPotentialRVVInstructionInQueue =
+      [&](SchedBoundary &Boundary, ReadyQueue Q, bool ShouldBeTop) -> SUnit * {
+    for (SUnit *SU : Q) {
+      if (SU->isScheduled)
+        continue;
+
       MachineInstr *MI = SU->getInstr();
       const MCInstrDesc &Desc = MI->getDesc();
       if (RISCVII::hasSEWOp(Desc.TSFlags)) {
         unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
         RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags);
+        // FIXME: We should consider vl and policy here.
         if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) {
-          Bot.removeReady(SU);
-          IsTopNode = true;
+          IsTopNode = ShouldBeTop;
+          // Boundary.removeReady(SU);
+          if (SU->isTopReady())
+            Top.removeReady(SU);
+          if (SU->isBottomReady())
+            Bot.removeReady(SU);
+          LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
+                            << *SU->getInstr());
           return SU;
         }
       }
     }
-    for (SUnit *SU : Bot.Pending) {
-      MachineInstr *MI = SU->getInstr();
-      const MCInstrDesc &Desc = MI->getDesc();
-      if (RISCVII::hasSEWOp(Desc.TSFlags)) {
-        unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
-        RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags);
-        if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) {
-          Bot.removeReady(SU);
-          IsTopNode = false;
-          return SU;
-        }
-      }
+    return nullptr;
+  };
+
+  auto FindPotentialRVVInstruction = [&](SchedBoundary &Boundary,
+                                         bool ShouldBeTop) -> SUnit * {
+    if (SUnit *Available = FindPotentialRVVInstructionInQueue(
+            Boundary, Boundary.Available, ShouldBeTop))
+      return Available;
+    if (SUnit *Pending = FindPotentialRVVInstructionInQueue(
+            Boundary, Boundary.Pending, ShouldBeTop))
+      return Pending;
+    return nullptr;
+  };
+
+  if (EnableScheduleSameVType) {
+    if (RegionPolicy.OnlyBottomUp) {
+      if (SUnit *SU = FindPotentialRVVInstruction(Bot, false))
+        return SU;
+    } else if (RegionPolicy.OnlyTopDown) {
+      if (SUnit *SU = FindPotentialRVVInstruction(Top, true))
+        return SU;
+    } else {
+      if (SUnit *SU =
+              FindPotentialRVVInstructionInQueue(Bot, Bot.Available, false))
+        return SU;
+      if (SUnit *SU =
+              FindPotentialRVVInstructionInQueue(Top, Top.Available, true))
+        return SU;
+      if (SUnit *SU =
+              FindPotentialRVVInstructionInQueue(Bot, Bot.Pending, false))
+        return SU;
+      if (SUnit *SU =
+              FindPotentialRVVInstructionInQueue(Top, Top.Pending, true))
+        return SU;
     }
   }
+
   return GenericScheduler::pickNode(IsTopNode);
 }
 
@@ -73,11 +107,12 @@ void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
   if (RISCVII::hasSEWOp(Desc.TSFlags)) {
     PrevVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
     PrevVLMUL = RISCVII::getLMul(Desc.TSFlags);
+    LLVM_DEBUG(dbgs() << "Previous scheduled Unit: ";
+               dbgs() << "SU(" << SU->NodeNum << ") - ";
+               SU->getInstr()->dump(););
+    LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n";
+               auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL);
+               dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "")
+                      << LMUL.first << "\n";);
   }
-  LLVM_DEBUG(dbgs() << "Previous scheduled Unit: ";
-             dbgs() << "SU(" << SU->NodeNum << ") - "; SU->getInstr()->dump(););
-  LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n";
-             auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL);
-             dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "")
-                    << LMUL.first << "\n";);
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/schedule.ll b/llvm/test/CodeGen/RISCV/rvv/schedule.ll
index baf15ef400df5..6b466d802ac4a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/schedule.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/schedule.ll
@@ -1,15 +1,33 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefix=DEFAULT
-; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
-; RUN:   | FileCheck %s --check-prefix=SAME-VTYPE-FIRST
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=true -misched-topdown=false \
+; RUN:   -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-BOTTOMUP
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=false -misched-topdown=true \
+; RUN:   -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-TOPDOWN
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=false -misched-topdown=false \
+; RUN:   -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-BIDIRECTIONAL
 
-define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) {
+declare void @consume(i64 %scalar, <vscale x 1 x i64> %vector)
+
+define void @test(i64 %a, i64 %b, <vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) {
 ; DEFAULT-LABEL: test:
 ; DEFAULT:       # %bb.0: # %entry
-; DEFAULT-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; DEFAULT-NEXT:    addi sp, sp, -16
+; DEFAULT-NEXT:    .cfi_def_cfa_offset 16
+; DEFAULT-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; DEFAULT-NEXT:    .cfi_offset ra, -8
+; DEFAULT-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
 ; DEFAULT-NEXT:    vdiv.vv v12, v8, v9
 ; DEFAULT-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; DEFAULT-NEXT:    div a2, a0, a1
+; DEFAULT-NEXT:    add a3, a0, a1
+; DEFAULT-NEXT:    mul a0, a0, a1
+; DEFAULT-NEXT:    add a0, a0, a3
+; DEFAULT-NEXT:    add a0, a0, a2
 ; DEFAULT-NEXT:    vdiv.vv v13, v10, v11
 ; DEFAULT-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
 ; DEFAULT-NEXT:    vadd.vv v8, v8, v9
@@ -20,30 +38,103 @@ define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v
 ; DEFAULT-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
 ; DEFAULT-NEXT:    vadd.vv v9, v9, v13
 ; DEFAULT-NEXT:    vwadd.wv v8, v8, v9
+; DEFAULT-NEXT:    call consume
+; DEFAULT-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; DEFAULT-NEXT:    addi sp, sp, 16
 ; DEFAULT-NEXT:    ret
 ;
-; SAME-VTYPE-FIRST-LABEL: test:
-; SAME-VTYPE-FIRST:       # %bb.0: # %entry
-; SAME-VTYPE-FIRST-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; SAME-VTYPE-FIRST-NEXT:    vadd.vv v12, v8, v9
-; SAME-VTYPE-FIRST-NEXT:    vdiv.vv v8, v8, v9
-; SAME-VTYPE-FIRST-NEXT:    vadd.vv v8, v12, v8
-; SAME-VTYPE-FIRST-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
-; SAME-VTYPE-FIRST-NEXT:    vadd.vv v9, v10, v11
-; SAME-VTYPE-FIRST-NEXT:    vdiv.vv v10, v10, v11
-; SAME-VTYPE-FIRST-NEXT:    vadd.vv v9, v9, v10
-; SAME-VTYPE-FIRST-NEXT:    vwadd.wv v8, v8, v9
-; SAME-VTYPE-FIRST-NEXT:    ret
+; SAME-VTYPE-FIRST-BOTTOMUP-LABEL: test:
+; SAME-VTYPE-FIRST-BOTTOMUP:       # %bb.0: # %entry
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    addi sp, sp, -16
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    .cfi_def_cfa_offset 16
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    .cfi_offset ra, -8
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vadd.vv v12, v8, v9
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    div a2, a0, a1
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    add a3, a0, a1
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vdiv.vv v8, v8, v9
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    mul a0, a0, a1
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    add a0, a0, a3
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    add a0, a0, a2
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vadd.vv v8, v12, v8
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vadd.vv v9, v10, v11
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vdiv.vv v10, v10, v11
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vadd.vv v9, v9, v10
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    vwadd.wv v8, v8, v9
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    call consume
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    addi sp, sp, 16
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT:    ret
+;
+; SAME-VTYPE-FIRST-TOPDOWN-LABEL: test:
+; SAME-VTYPE-FIRST-TOPDOWN:       # %bb.0: # %entry
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    addi sp, sp, -16
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    .cfi_def_cfa_offset 16
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    .cfi_offset ra, -8
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vsetvli a3, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vadd.vv v12, v10, v11
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vdiv.vv v10, v10, v11
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    add a2, a0, a1
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    mul a3, a0, a1
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    div a0, a0, a1
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    add a2, a2, a3
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vadd.vv v10, v12, v10
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vadd.vv v11, v8, v9
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vdiv.vv v8, v8, v9
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    add a0, a0, a2
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vadd.vv v8, v11, v8
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    vwadd.wv v8, v8, v10
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    call consume
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    addi sp, sp, 16
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT:    ret
+;
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-LABEL: test:
+; SAME-VTYPE-FIRST-BIDIRECTIONAL:       # %bb.0: # %entry
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    addi sp, sp, -16
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    .cfi_def_cfa_offset 16
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    .cfi_offset ra, -8
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vsetvli a2, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vadd.vv v12, v10, v11
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vdiv.vv v10, v10, v11
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    div a2, a0, a1
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    add a3, a0, a1
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    mul a0, a0, a1
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    add a0, a0, a3
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vadd.vv v11, v8, v9
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vdiv.vv v8, v8, v9
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    add a0, a0, a2
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vadd.vv v8, v11, v8
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vadd.vv v9, v12, v10
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    vwadd.wv v8, v8, v9
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    call consume
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    addi sp, sp, 16
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT:    ret
 entry:
   %0 = add <vscale x 1 x i64> %v64_0, %v64_1
+  %scalar0 = add i64 %a, %b
   %1 = add <vscale x 1 x i32> %v32_0, %v32_1
   %2 = sdiv <vscale x 1 x i64> %v64_0, %v64_1
+  %scalar1 = mul i64 %a, %b
   %3 = sdiv <vscale x 1 x i32> %v32_0, %v32_1
   %4 = add <vscale x 1 x i64> %0, %2
+  %scalar2 = sdiv i64 %a, %b
   %5 = add <vscale x 1 x i32> %1, %3
 
   %6 = sext <vscale x 1 x i32> %5 to <vscale x 1 x i64>
+  %scalar3 = add i64 %scalar0, %scalar1
   %7 = add <vscale x 1 x i64> %4, %6
-  ret <vscale x 1 x i64> %7
+  %scalar4 = add i64 %scalar2, %scalar3
+  call void @consume(i64 %scalar4, <vscale x 1 x i64> %7)
+  ret void
 }
 

>From b86faa86e6e8e661e460ccbe20048141ff0b2c13 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 2 Dec 2025 17:00:12 +0800
Subject: [PATCH 3/4] Revert llvm/lib/CodeGen/MachineScheduler.cpp change

Created using spr 1.3.6-beta.1
---
 llvm/lib/CodeGen/MachineScheduler.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index efe64788c5efa..de29a9fab876e 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -2964,7 +2964,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
   unsigned NextCycle = CurrCycle;
   switch (SchedModel->getMicroOpBufferSize()) {
   case 0:
-    // assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
+    assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
     break;
   case 1:
     if (ReadyCycle > NextCycle) {

>From d1578900fc951d219904b3cde2d9f7ae46bce381 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Sun, 4 Jan 2026 14:44:13 +0800
Subject: [PATCH 4/4] Remove private, add EnableVTypeSchedHeuristic option and
 rework tests

Created using spr 1.3.6-beta.1
---
 .../Target/RISCV/RISCVMachineScheduler.cpp    |  10 +-
 llvm/lib/Target/RISCV/RISCVMachineScheduler.h |   8 +-
 llvm/lib/Target/RISCV/RISCVProcessors.td      |   4 +-
 llvm/lib/Target/RISCV/RISCVSubtarget.h        |   6 +
 .../RISCV/rvv/rvv-vtype-based-scheduler.ll    | 555 +-----------------
 5 files changed, 46 insertions(+), 537 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
index e7035ce399bd3..32c95082ccf1e 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
@@ -18,6 +18,12 @@ static cl::opt<bool> EnableVTypeSchedHeuristic(
     cl::desc("Enable scheduling RVV instructions based on vtype heuristic "
              "(pick instruction with compatible vtype first)"));
 
+bool RISCVPreRAMachineSchedStrategy::enableVTypeSchedHeuristic() const {
+  if (EnableVTypeSchedHeuristic.getNumOccurrences() > 0)
+    return EnableVTypeSchedHeuristic;
+  return ST->enableVTypeSchedHeuristic();
+}
+
 RISCV::VSETVLIInfo
 RISCVPreRAMachineSchedStrategy::getVSETVLIInfo(const MachineInstr *MI) const {
   unsigned TSFlags = MI->getDesc().TSFlags;
@@ -177,7 +183,7 @@ bool RISCVPreRAMachineSchedStrategy::tryCandidate(SchedCandidate &Cand,
 
   // TODO: We should not use `CandReason::Cluster` here, but is there a
   // mechanism to extend this enum?
-  if (EnableVTypeSchedHeuristic &&
+  if (enableVTypeSchedHeuristic() &&
       tryVType(getVSETVLIInfo(TryCand.SU->getInstr()),
                getVSETVLIInfo(Cand.SU->getInstr()), TryCand, Cand, Cluster))
     return TryCand.Reason != NoCand;
@@ -197,7 +203,7 @@ void RISCVPreRAMachineSchedStrategy::leaveMBB() {
 
 void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
   GenericScheduler::schedNode(SU, IsTopNode);
-  if (EnableVTypeSchedHeuristic) {
+  if (enableVTypeSchedHeuristic()) {
     MachineInstr *MI = SU->getInstr();
     const RISCV::VSETVLIInfo &Info = getVSETVLIInfo(MI);
     if (Info.isValid()) {
diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h
index 4bd07871a0b97..a4a439f489c2b 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h
@@ -21,11 +21,13 @@ namespace llvm {
 
 /// A GenericScheduler implementation for RISCV pre RA scheduling.
 class RISCVPreRAMachineSchedStrategy : public GenericScheduler {
-private:
+  const RISCVSubtarget *ST;
   RISCV::RISCVVSETVLIInfoAnalysis VIA;
   RISCV::VSETVLIInfo TopVType;
   RISCV::VSETVLIInfo BottomVType;
 
+  bool enableVTypeSchedHeuristic() const;
+
   RISCV::VSETVLIInfo getVSETVLIInfo(const MachineInstr *MI) const;
   bool tryVType(RISCV::VSETVLIInfo TryVType, RISCV::VSETVLIInfo CandVtype,
                 SchedCandidate &TryCand, SchedCandidate &Cand,
@@ -33,8 +35,8 @@ class RISCVPreRAMachineSchedStrategy : public GenericScheduler {
 
 public:
   RISCVPreRAMachineSchedStrategy(const MachineSchedContext *C)
-      : GenericScheduler(C),
-        VIA(&C->MF->getSubtarget<RISCVSubtarget>(), C->LIS) {}
+      : GenericScheduler(C), ST(&C->MF->getSubtarget<RISCVSubtarget>()),
+        VIA(ST, C->LIS) {}
 
 protected:
   bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index 5becfd2ad502b..7c151fcebcbdf 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -45,6 +45,8 @@ class RISCVTuneInfo {
 
   // The direction of PostRA scheduling.
   code PostRASchedDirection = TopDown;
+
+  bit EnableVTypeSchedHeuristic = 0;
 }
 
 def RISCVTuneInfoTable : GenericTable {
@@ -58,7 +60,7 @@ def RISCVTuneInfoTable : GenericTable {
                 "MaxStoresPerMemcpyOptSize", "MaxStoresPerMemcpy",
                 "MaxStoresPerMemmoveOptSize", "MaxStoresPerMemmove",
                 "MaxLoadsPerMemcmpOptSize", "MaxLoadsPerMemcmp",
-                "PostRASchedDirection"];
+                "PostRASchedDirection", "EnableVTypeSchedHeuristic"];
 }
 
 def getRISCVTuneInfo : SearchIndex {
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index ae6ca970c0c49..c4f0705d6864e 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -70,6 +70,8 @@ struct RISCVTuneInfo {
 
   // The direction of PostRA scheduling.
   MISched::Direction PostRASchedDirection;
+
+  bool EnableVTypeSchedHeuristic;
 };
 
 #define GET_RISCVTuneInfoTable_DECL
@@ -433,6 +435,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
     return TuneInfo->PostRASchedDirection;
   }
 
+  bool enableVTypeSchedHeuristic() const {
+    return TuneInfo->EnableVTypeSchedHeuristic;
+  }
+
   void overrideSchedPolicy(MachineSchedPolicy &Policy,
                            const SchedRegion &Region) const override;
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll
index 8c48937e573ea..960906bae316d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-vtype-based-scheduler.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=riscv64 -mcpu=spacemit-x60 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefix=DEFAULT
 ; RUN: llc -mtriple=riscv64 -mcpu=spacemit-x60 -misched-prera-direction=bottomup \
@@ -11,388 +10,18 @@
 ; RUN:   -riscv-enable-vtype-sched-heuristic -verify-machineinstrs < %s \
 ; RUN:   | FileCheck %s --check-prefix=VTYPE-SCHED-BIDIRECTIONAL
 
-define void @test(i16 %0, i16 %1, i16 %2, i16 %3, i16 %4, i16 %5, i16 %6, ptr %7, ptr %8, ptr %9, ptr %10, ptr %11, i32 %12) {
-; DEFAULT-LABEL: test:
-; DEFAULT:       # %bb.0: # %entry
-; DEFAULT-NEXT:    ld a6, 0(sp)
-; DEFAULT-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; DEFAULT-NEXT:    vle8.v v9, (a7)
-; DEFAULT-NEXT:    ld a7, 8(sp)
-; DEFAULT-NEXT:    vle8.v v8, (a6)
-; DEFAULT-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; DEFAULT-NEXT:    vslidedown.vi v10, v9, 1
-; DEFAULT-NEXT:    ld a6, 16(sp)
-; DEFAULT-NEXT:    vslidedown.vi v11, v8, 1
-; DEFAULT-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; DEFAULT-NEXT:    vle8.v v12, (a7)
-; DEFAULT-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; DEFAULT-NEXT:    vslidedown.vi v13, v12, 1
-; DEFAULT-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; DEFAULT-NEXT:    vle8.v v14, (a6)
-; DEFAULT-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; DEFAULT-NEXT:    vslidedown.vi v15, v14, 1
-; DEFAULT-NEXT:    vslidedown.vi v16, v9, 2
-; DEFAULT-NEXT:    vslidedown.vi v17, v8, 2
-; DEFAULT-NEXT:    vslidedown.vi v18, v12, 2
-; DEFAULT-NEXT:    vslidedown.vi v19, v14, 2
-; DEFAULT-NEXT:    vslidedown.vi v20, v9, 3
-; DEFAULT-NEXT:    vslidedown.vi v21, v8, 3
-; DEFAULT-NEXT:    vslidedown.vi v22, v12, 3
-; DEFAULT-NEXT:    vslidedown.vi v23, v14, 3
-; DEFAULT-NEXT:    vslidedown.vi v24, v9, 4
-; DEFAULT-NEXT:    vslidedown.vi v25, v8, 4
-; DEFAULT-NEXT:    vslidedown.vi v26, v12, 4
-; DEFAULT-NEXT:    vslidedown.vi v27, v14, 4
-; DEFAULT-NEXT:    vslidedown.vi v28, v9, 5
-; DEFAULT-NEXT:    vslidedown.vi v29, v8, 5
-; DEFAULT-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; DEFAULT-NEXT:    vmv.v.i v30, 0
-; DEFAULT-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; DEFAULT-NEXT:    vslidedown.vi v31, v12, 5
-; DEFAULT-NEXT:    vmv1r.v v7, v30
-; DEFAULT-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; DEFAULT-NEXT:    vwmaccsu.vx v7, a0, v9
-; DEFAULT-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; DEFAULT-NEXT:    vslidedown.vi v9, v14, 5
-; DEFAULT-NEXT:    vmv1r.v v6, v30
-; DEFAULT-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; DEFAULT-NEXT:    vwmaccsu.vx v6, a0, v8
-; DEFAULT-NEXT:    vwmaccsu.vx v7, a1, v10
-; DEFAULT-NEXT:    vmv1r.v v8, v30
-; DEFAULT-NEXT:    vwmaccsu.vx v8, a0, v12
-; DEFAULT-NEXT:    vwmaccsu.vx v6, a1, v11
-; DEFAULT-NEXT:    vwmaccsu.vx v7, a2, v16
-; DEFAULT-NEXT:    vwmaccsu.vx v30, a0, v14
-; DEFAULT-NEXT:    vwmaccsu.vx v8, a1, v13
-; DEFAULT-NEXT:    vwmaccsu.vx v6, a2, v17
-; DEFAULT-NEXT:    vwmaccsu.vx v7, a3, v20
-; DEFAULT-NEXT:    vwmaccsu.vx v30, a1, v15
-; DEFAULT-NEXT:    vwmaccsu.vx v8, a2, v18
-; DEFAULT-NEXT:    vwmaccsu.vx v6, a3, v21
-; DEFAULT-NEXT:    vwmaccsu.vx v7, a4, v24
-; DEFAULT-NEXT:    vwmaccsu.vx v30, a2, v19
-; DEFAULT-NEXT:    vwmaccsu.vx v8, a3, v22
-; DEFAULT-NEXT:    vwmaccsu.vx v6, a4, v25
-; DEFAULT-NEXT:    vwmaccsu.vx v7, a5, v28
-; DEFAULT-NEXT:    vwmaccsu.vx v30, a3, v23
-; DEFAULT-NEXT:    vwmaccsu.vx v8, a4, v26
-; DEFAULT-NEXT:    vwmaccsu.vx v6, a5, v29
-; DEFAULT-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; DEFAULT-NEXT:    vmax.vx v10, v7, zero
-; DEFAULT-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; DEFAULT-NEXT:    vwmaccsu.vx v30, a4, v27
-; DEFAULT-NEXT:    vwmaccsu.vx v8, a5, v31
-; DEFAULT-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; DEFAULT-NEXT:    vmax.vx v11, v6, zero
-; DEFAULT-NEXT:    csrwi vxrm, 0
-; DEFAULT-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; DEFAULT-NEXT:    vnclipu.wi v10, v10, 6
-; DEFAULT-NEXT:    vwmaccsu.vx v30, a5, v9
-; DEFAULT-NEXT:    ld a0, 24(sp)
-; DEFAULT-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; DEFAULT-NEXT:    vmax.vx v8, v8, zero
-; DEFAULT-NEXT:    lw a1, 32(sp)
-; DEFAULT-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; DEFAULT-NEXT:    vnclipu.wi v9, v11, 6
-; DEFAULT-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; DEFAULT-NEXT:    vmax.vx v11, v30, zero
-; DEFAULT-NEXT:    vse8.v v10, (a0)
-; DEFAULT-NEXT:    add a2, a0, a1
-; DEFAULT-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; DEFAULT-NEXT:    vnclipu.wi v8, v8, 6
-; DEFAULT-NEXT:    vse8.v v9, (a2)
-; DEFAULT-NEXT:    sh1add a2, a1, a0
-; DEFAULT-NEXT:    vnclipu.wi v9, v11, 6
-; DEFAULT-NEXT:    sh1add a1, a1, a1
-; DEFAULT-NEXT:    vse8.v v8, (a2)
-; DEFAULT-NEXT:    add a0, a0, a1
-; DEFAULT-NEXT:    vse8.v v9, (a0)
-; DEFAULT-NEXT:    ret
-;
-; VTYPE-SCHED-BOTTOMUP-LABEL: test:
-; VTYPE-SCHED-BOTTOMUP:       # %bb.0: # %entry
-; VTYPE-SCHED-BOTTOMUP-NEXT:    ld a6, 0(sp)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vle8.v v9, (a7)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    ld a7, 8(sp)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vle8.v v8, (a6)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v10, v9, 1
-; VTYPE-SCHED-BOTTOMUP-NEXT:    ld a6, 16(sp)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vle8.v v11, (a7)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v12, v8, 1
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vle8.v v13, (a6)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v14, v11, 1
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v15, v13, 1
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v16, v9, 2
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v17, v8, 2
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v18, v11, 2
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v19, v13, 2
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v20, v9, 3
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v21, v8, 3
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v22, v11, 3
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v23, v13, 3
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v24, v9, 4
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v25, v8, 4
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v26, v11, 4
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v27, v13, 4
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v28, v9, 5
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v29, v8, 5
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v30, v11, 5
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmv.v.i v31, 0
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vslidedown.vi v7, v13, 5
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmv1r.v v6, v31
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v6, a0, v9
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmv1r.v v9, v31
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v9, a0, v8
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v6, a1, v10
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmv1r.v v8, v31
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v8, a0, v11
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v31, a0, v13
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v9, a1, v12
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v6, a2, v16
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v8, a1, v14
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v31, a1, v15
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v9, a2, v17
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v6, a3, v20
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v8, a2, v18
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v31, a2, v19
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v9, a3, v21
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v6, a4, v24
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v8, a3, v22
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v31, a3, v23
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v9, a4, v25
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v6, a5, v28
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v8, a4, v26
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v31, a4, v27
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v9, a5, v29
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmax.vx v10, v6, zero
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v8, a5, v30
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vwmaccsu.vx v31, a5, v7
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmax.vx v9, v9, zero
-; VTYPE-SCHED-BOTTOMUP-NEXT:    csrwi vxrm, 0
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vnclipu.wi v10, v10, 6
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmax.vx v8, v8, zero
-; VTYPE-SCHED-BOTTOMUP-NEXT:    ld a0, 24(sp)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmax.vx v11, v31, zero
-; VTYPE-SCHED-BOTTOMUP-NEXT:    lw a1, 32(sp)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vnclipu.wi v9, v9, 6
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vnclipu.wi v8, v8, 6
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vse8.v v10, (a0)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vnclipu.wi v10, v11, 6
-; VTYPE-SCHED-BOTTOMUP-NEXT:    add a2, a0, a1
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vse8.v v9, (a2)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    sh1add a2, a1, a0
-; VTYPE-SCHED-BOTTOMUP-NEXT:    sh1add a1, a1, a1
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vse8.v v8, (a2)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    add a0, a0, a1
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vse8.v v10, (a0)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    ret
-;
-; VTYPE-SCHED-TOPDOWN-LABEL: test:
-; VTYPE-SCHED-TOPDOWN:       # %bb.0: # %entry
-; VTYPE-SCHED-TOPDOWN-NEXT:    lw t1, 32(sp)
-; VTYPE-SCHED-TOPDOWN-NEXT:    ld a6, 24(sp)
-; VTYPE-SCHED-TOPDOWN-NEXT:    ld t2, 16(sp)
-; VTYPE-SCHED-TOPDOWN-NEXT:    ld t3, 8(sp)
-; VTYPE-SCHED-TOPDOWN-NEXT:    ld t4, 0(sp)
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vle8.v v12, (a7)
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmv.v.i v8, 0
-; VTYPE-SCHED-TOPDOWN-NEXT:    csrwi vxrm, 0
-; VTYPE-SCHED-TOPDOWN-NEXT:    sh1add t5, t1, t1
-; VTYPE-SCHED-TOPDOWN-NEXT:    add t0, a6, t1
-; VTYPE-SCHED-TOPDOWN-NEXT:    sh1add a7, t1, a6
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vle8.v v13, (t4)
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v16, v12, 1
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmv1r.v v9, v8
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmv1r.v v10, v8
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmv1r.v v11, v8
-; VTYPE-SCHED-TOPDOWN-NEXT:    add t1, a6, t5
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vle8.v v14, (t2)
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v9, a0, v12
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v10, a0, v13
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v17, v13, 1
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vle8.v v15, (t3)
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v8, a0, v14
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v11, a0, v15
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v9, a1, v16
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v10, a1, v17
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v16, v15, 1
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v17, v14, 1
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v18, v12, 2
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v11, a1, v16
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v8, a1, v17
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v16, v13, 2
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v17, v15, 2
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v19, v14, 2
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v9, a2, v18
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v10, a2, v16
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v11, a2, v17
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v8, a2, v19
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v16, v12, 3
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v17, v13, 3
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v18, v15, 3
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v19, v14, 3
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v9, a3, v16
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v10, a3, v17
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v11, a3, v18
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v8, a3, v19
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v16, v12, 4
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v17, v13, 4
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v18, v15, 4
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v19, v14, 4
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v10, a4, v17
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v9, a4, v16
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v11, a4, v18
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v8, a4, v19
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v12, v12, 5
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v13, v13, 5
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v15, v15, 5
-; VTYPE-SCHED-TOPDOWN-NEXT:    vslidedown.vi v14, v14, 5
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v9, a5, v12
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v10, a5, v13
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v11, a5, v15
-; VTYPE-SCHED-TOPDOWN-NEXT:    vwmaccsu.vx v8, a5, v14
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmax.vx v9, v9, zero
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmax.vx v10, v10, zero
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmax.vx v11, v11, zero
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmax.vx v8, v8, zero
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vnclipu.wi v9, v9, 6
-; VTYPE-SCHED-TOPDOWN-NEXT:    vnclipu.wi v10, v10, 6
-; VTYPE-SCHED-TOPDOWN-NEXT:    vnclipu.wi v11, v11, 6
-; VTYPE-SCHED-TOPDOWN-NEXT:    vse8.v v9, (a6)
-; VTYPE-SCHED-TOPDOWN-NEXT:    vnclipu.wi v8, v8, 6
-; VTYPE-SCHED-TOPDOWN-NEXT:    vse8.v v10, (t0)
-; VTYPE-SCHED-TOPDOWN-NEXT:    vse8.v v11, (a7)
-; VTYPE-SCHED-TOPDOWN-NEXT:    vse8.v v8, (t1)
-; VTYPE-SCHED-TOPDOWN-NEXT:    ret
-;
-; VTYPE-SCHED-BIDIRECTIONAL-LABEL: test:
-; VTYPE-SCHED-BIDIRECTIONAL:       # %bb.0: # %entry
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    ld a6, 0(sp)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vle8.v v8, (a7)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    ld a7, 8(sp)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vle8.v v9, (a6)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v10, v8, 1
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    ld a6, 16(sp)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v11, v9, 1
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vle8.v v12, (a7)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v13, v12, 1
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vle8.v v14, (a6)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v15, v14, 1
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v16, v8, 2
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v17, v9, 2
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v18, v12, 2
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v19, v14, 2
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v20, v8, 3
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v21, v9, 3
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v22, v12, 3
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v23, v14, 3
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v24, v8, 4
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v25, v9, 4
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v26, v12, 4
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v27, v14, 4
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v28, v8, 5
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v29, v9, 5
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v30, v12, 5
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmv.v.i v31, 0
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetivli zero, 8, e8, m1, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vslidedown.vi v7, v14, 5
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmv1r.v v6, v31
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v6, a0, v8
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmv1r.v v8, v31
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v8, a0, v9
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v6, a1, v10
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmv1r.v v9, v31
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v9, a0, v12
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v31, a0, v14
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v8, a1, v11
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v6, a2, v16
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v9, a1, v13
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v31, a1, v15
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v8, a2, v17
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v6, a3, v20
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v9, a2, v18
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v31, a2, v19
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v8, a3, v21
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v6, a4, v24
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v9, a3, v22
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v31, a3, v23
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v8, a4, v25
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v6, a5, v28
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v9, a4, v26
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v31, a4, v27
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v8, a5, v29
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmax.vx v10, v6, zero
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v9, a5, v30
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vwmaccsu.vx v31, a5, v7
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmax.vx v8, v8, zero
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    csrwi vxrm, 0
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vnclipu.wi v10, v10, 6
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    ld a0, 24(sp)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e16, m1, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmax.vx v9, v9, zero
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    lw a1, 32(sp)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmax.vx v11, v31, zero
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e8, mf2, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vnclipu.wi v8, v8, 6
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vse8.v v10, (a0)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vnclipu.wi v9, v9, 6
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    add a2, a0, a1
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    sh1add a3, a1, a0
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vse8.v v8, (a2)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vnclipu.wi v8, v11, 6
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    sh1add a1, a1, a1
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vse8.v v9, (a3)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    add a0, a0, a1
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vse8.v v8, (a0)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    ret
+define void @test0(i16 %0, i16 %1, i16 %2, i16 %3, i16 %4, i16 %5, i16 %6, ptr %7, ptr %8, ptr %9, ptr %10, ptr %11, i32 %12) {
+; DEFAULT-LABEL: test0:
+; DEFAULT-COUNT-19: vset
+
+; VTYPE-SCHED-BOTTOMUP-LABEL: test0:
+; VTYPE-SCHED-BOTTOMUP-COUNT-15: vset
+
+; VTYPE-SCHED-TOPDOWN-LABEL: test0:
+; VTYPE-SCHED-TOPDOWN-COUNT-19: vset
+
+; VTYPE-SCHED-BIDIRECTIONAL-LABEL: test0:
+; VTYPE-SCHED-BIDIRECTIONAL-15: vset
 entry:
   %14 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.p0.i64(<vscale x 8 x i8> poison, ptr %7, i64 16)
   %15 = tail call <vscale x 8 x i8> @llvm.riscv.vle.nxv8i8.p0.i64(<vscale x 8 x i8> poison, ptr %8, i64 16)
@@ -496,154 +125,18 @@ entry:
   ret void
 }
 
-define void @foo(ptr %0, ptr %1, ptr %2, ptr %3, ptr %4) {
-; DEFAULT-LABEL: foo:
-; DEFAULT:       # %bb.0: # %entry
-; DEFAULT-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
-; DEFAULT-NEXT:    vle64.v v8, (a1)
-; DEFAULT-NEXT:    vle64.v v10, (a2)
-; DEFAULT-NEXT:    vmsltu.vv v12, v8, v10
-; DEFAULT-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; DEFAULT-NEXT:    vmv.x.s a1, v12
-; DEFAULT-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; DEFAULT-NEXT:    vmseq.vv v14, v8, v10
-; DEFAULT-NEXT:    vle64.v v12, (a3)
-; DEFAULT-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; DEFAULT-NEXT:    vmv.x.s a2, v14
-; DEFAULT-NEXT:    vle64.v v14, (a4)
-; DEFAULT-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; DEFAULT-NEXT:    vmsltu.vv v16, v12, v14
-; DEFAULT-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; DEFAULT-NEXT:    vmv.x.s a3, v16
-; DEFAULT-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; DEFAULT-NEXT:    vmseq.vv v16, v12, v14
-; DEFAULT-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; DEFAULT-NEXT:    vmv.x.s a4, v16
-; DEFAULT-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; DEFAULT-NEXT:    vsub.vv v8, v8, v10
-; DEFAULT-NEXT:    sh1add a1, a1, a2
-; DEFAULT-NEXT:    xor a1, a1, a2
-; DEFAULT-NEXT:    vmv.s.x v0, a1
-; DEFAULT-NEXT:    vmv.v.i v10, 0
-; DEFAULT-NEXT:    vmerge.vim v16, v10, 1, v0
-; DEFAULT-NEXT:    vsub.vv v8, v8, v16
-; DEFAULT-NEXT:    sh1add a1, a3, a4
-; DEFAULT-NEXT:    xor a1, a1, a4
-; DEFAULT-NEXT:    vmv.s.x v0, a1
-; DEFAULT-NEXT:    vsub.vv v12, v12, v14
-; DEFAULT-NEXT:    vmerge.vim v10, v10, 1, v0
-; DEFAULT-NEXT:    vsub.vv v10, v12, v10
-; DEFAULT-NEXT:    vand.vv v8, v10, v8
-; DEFAULT-NEXT:    vse64.v v8, (a0)
-; DEFAULT-NEXT:    ret
-;
-; VTYPE-SCHED-BOTTOMUP-LABEL: foo:
-; VTYPE-SCHED-BOTTOMUP:       # %bb.0: # %entry
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vle64.v v12, (a1)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vle64.v v14, (a2)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vle64.v v8, (a3)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vle64.v v10, (a4)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmsltu.vv v16, v12, v14
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmsltu.vv v17, v8, v10
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmv.x.s a1, v16
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmv.x.s a2, v17
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmseq.vv v16, v12, v14
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmseq.vv v17, v8, v10
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmv.x.s a3, v16
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmv.x.s a4, v17
-; VTYPE-SCHED-BOTTOMUP-NEXT:    sh1add a1, a1, a3
-; VTYPE-SCHED-BOTTOMUP-NEXT:    xor a1, a1, a3
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmv.s.x v0, a1
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsub.vv v12, v12, v14
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmv.v.i v14, 0
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmerge.vim v16, v14, 1, v0
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsub.vv v12, v12, v16
-; VTYPE-SCHED-BOTTOMUP-NEXT:    sh1add a1, a2, a4
-; VTYPE-SCHED-BOTTOMUP-NEXT:    xor a1, a1, a4
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmv.s.x v0, a1
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsub.vv v8, v8, v10
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vmerge.vim v10, v14, 1, v0
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vsub.vv v8, v8, v10
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vand.vv v8, v8, v12
-; VTYPE-SCHED-BOTTOMUP-NEXT:    vse64.v v8, (a0)
-; VTYPE-SCHED-BOTTOMUP-NEXT:    ret
-;
-; VTYPE-SCHED-TOPDOWN-LABEL: foo:
-; VTYPE-SCHED-TOPDOWN:       # %bb.0: # %entry
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vle64.v v10, (a1)
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmv.v.i v8, 0
-; VTYPE-SCHED-TOPDOWN-NEXT:    vle64.v v12, (a2)
-; VTYPE-SCHED-TOPDOWN-NEXT:    vle64.v v14, (a3)
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmsltu.vv v18, v10, v12
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmseq.vv v19, v10, v12
-; VTYPE-SCHED-TOPDOWN-NEXT:    vle64.v v16, (a4)
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsub.vv v10, v10, v12
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmv.x.s a1, v18
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmv.x.s a2, v19
-; VTYPE-SCHED-TOPDOWN-NEXT:    sh1add a1, a1, a2
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmsltu.vv v18, v14, v16
-; VTYPE-SCHED-TOPDOWN-NEXT:    xor a1, a1, a2
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsub.vv v12, v14, v16
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmseq.vv v19, v14, v16
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmv.s.x v0, a1
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmv.x.s a1, v18
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmv.x.s a2, v19
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmerge.vim v14, v8, 1, v0
-; VTYPE-SCHED-TOPDOWN-NEXT:    sh1add a1, a1, a2
-; VTYPE-SCHED-TOPDOWN-NEXT:    xor a1, a1, a2
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsub.vv v10, v10, v14
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmv.s.x v0, a1
-; VTYPE-SCHED-TOPDOWN-NEXT:    vmerge.vim v8, v8, 1, v0
-; VTYPE-SCHED-TOPDOWN-NEXT:    vsub.vv v8, v12, v8
-; VTYPE-SCHED-TOPDOWN-NEXT:    vand.vv v8, v8, v10
-; VTYPE-SCHED-TOPDOWN-NEXT:    vse64.v v8, (a0)
-; VTYPE-SCHED-TOPDOWN-NEXT:    ret
-;
-; VTYPE-SCHED-BIDIRECTIONAL-LABEL: foo:
-; VTYPE-SCHED-BIDIRECTIONAL:       # %bb.0: # %entry
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetivli zero, 8, e64, m2, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vle64.v v12, (a1)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vle64.v v14, (a2)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vle64.v v8, (a3)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmsltu.vv v16, v12, v14
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vle64.v v10, (a4)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmsltu.vv v17, v8, v10
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmv.x.s a1, v16
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmv.x.s a2, v17
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmseq.vv v16, v12, v14
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmseq.vv v17, v8, v10
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e8, mf4, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmv.x.s a3, v16
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmv.x.s a4, v17
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    sh1add a1, a1, a3
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    xor a1, a1, a3
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmv.s.x v0, a1
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsub.vv v12, v12, v14
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmv.v.i v14, 0
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmerge.vim v16, v14, 1, v0
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsub.vv v12, v12, v16
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    sh1add a1, a2, a4
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    xor a1, a1, a4
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmv.s.x v0, a1
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsub.vv v8, v8, v10
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vmerge.vim v10, v14, 1, v0
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vsub.vv v8, v8, v10
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vand.vv v8, v8, v12
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    vse64.v v8, (a0)
-; VTYPE-SCHED-BIDIRECTIONAL-NEXT:    ret
+define void @test1(ptr %0, ptr %1, ptr %2, ptr %3, ptr %4) {
+; DEFAULT-LABEL: test1:
+; DEFAULT-COUNT-9: vset
+
+; VTYPE-SCHED-BOTTOMUP-LABEL: test1:
+; VTYPE-SCHED-BOTTOMUP-COUNT-5: vset
+
+; VTYPE-SCHED-TOPDOWN-LABEL: test1:
+; VTYPE-SCHED-TOPDOWN-COUNT-5: vset
+
+; VTYPE-SCHED-BIDIRECTIONAL-LABEL: test1:
+; VTYPE-SCHED-BIDIRECTIONAL-5: vset
 entry:
   %5 = load <8 x i64>, ptr %1, align 64
   %6 = load <8 x i64>, ptr %2, align 64



More information about the llvm-branch-commits mailing list