[llvm] [RISCV][PoC] Schedule RVV instructions with same type first (PR #95924)
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 19 03:46:00 PDT 2024
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/95924
>From 5ac4ff3040f8a5a6cc68efffe3349ef9d181ddec Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 18 Jun 2024 21:33:25 +0800
Subject: [PATCH 1/2] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20in?=
=?UTF-8?q?itial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.6-beta.1
---
llvm/include/llvm/CodeGen/MachineScheduler.h | 43 ++++++++--
llvm/lib/CodeGen/MachineScheduler.cpp | 34 +-------
llvm/lib/Target/RISCV/CMakeLists.txt | 1 +
.../Target/RISCV/RISCVMachineScheduler.cpp | 83 +++++++++++++++++++
llvm/lib/Target/RISCV/RISCVMachineScheduler.h | 42 ++++++++++
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 8 +-
llvm/test/CodeGen/RISCV/rvv/schedule.ll | 49 +++++++++++
7 files changed, 215 insertions(+), 45 deletions(-)
create mode 100644 llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
create mode 100644 llvm/lib/Target/RISCV/RISCVMachineScheduler.h
create mode 100644 llvm/test/CodeGen/RISCV/rvv/schedule.ll
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index b15abf040058e..d1b5b83e5300b 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -1349,14 +1349,6 @@ class PostGenericScheduler : public GenericSchedulerBase {
void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand);
};
-/// Create the standard converging machine scheduler. This will be used as the
-/// default scheduler if the target does not set a default.
-/// Adds default DAG mutations.
-ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C);
-
-/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
-ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C);
-
/// If ReorderWhileClustering is set to true, no attempt will be made to
/// reduce reordering due to store clustering.
std::unique_ptr<ScheduleDAGMutation>
@@ -1375,6 +1367,41 @@ std::unique_ptr<ScheduleDAGMutation>
createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI);
+/// Create the standard converging machine scheduler. This will be used as the
+/// default scheduler if the target does not set a default.
+/// Adds default DAG mutations.
+template <typename Strategy = GenericScheduler>
+ScheduleDAGMILive *createGenericSchedLive(MachineSchedContext *C) {
+ ScheduleDAGMILive *DAG =
+ new ScheduleDAGMILive(C, std::make_unique<Strategy>(C));
+ // Register DAG post-processors.
+ //
+ // FIXME: extend the mutation API to allow earlier mutations to instantiate
+ // data and pass it to later mutations. Have a single mutation that gathers
+ // the interesting nodes in one pass.
+ DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
+
+ const TargetSubtargetInfo &STI = C->MF->getSubtarget();
+ // Add MacroFusion mutation if fusions are not empty.
+ const auto &MacroFusions = STI.getMacroFusions();
+ if (!MacroFusions.empty())
+ DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
+ return DAG;
+}
+
+/// Create a generic scheduler with no vreg liveness or DAG mutation passes.
+template <typename Strategy = PostGenericScheduler>
+ScheduleDAGMI *createGenericSchedPostRA(MachineSchedContext *C) {
+ ScheduleDAGMI *DAG = new ScheduleDAGMI(C, std::make_unique<Strategy>(C),
+ /*RemoveKillFlags=*/true);
+ const TargetSubtargetInfo &STI = C->MF->getSubtarget();
+ // Add MacroFusion mutation if fusions are not empty.
+ const auto &MacroFusions = STI.getMacroFusions();
+ if (!MacroFusions.empty())
+ DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
+ return DAG;
+}
+
} // end namespace llvm
#endif // LLVM_CODEGEN_MACHINESCHEDULER_H
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index cf72f74380835..ac792ad4d5484 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -2701,7 +2701,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
unsigned NextCycle = CurrCycle;
switch (SchedModel->getMicroOpBufferSize()) {
case 0:
- assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
+ // assert(ReadyCycle <= CurrCycle && "Broken PendingQueue");
break;
case 1:
if (ReadyCycle > NextCycle) {
@@ -3847,26 +3847,6 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
}
}
-/// Create the standard converging machine scheduler. This will be used as the
-/// default scheduler if the target does not set a default.
-ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
- ScheduleDAGMILive *DAG =
- new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
- // Register DAG post-processors.
- //
- // FIXME: extend the mutation API to allow earlier mutations to instantiate
- // data and pass it to later mutations. Have a single mutation that gathers
- // the interesting nodes in one pass.
- DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
-
- const TargetSubtargetInfo &STI = C->MF->getSubtarget();
- // Add MacroFusion mutation if fusions are not empty.
- const auto &MacroFusions = STI.getMacroFusions();
- if (!MacroFusions.empty())
- DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
- return DAG;
-}
-
static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) {
return createGenericSchedLive(C);
}
@@ -4139,18 +4119,6 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
}
}
-ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
- ScheduleDAGMI *DAG =
- new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
- /*RemoveKillFlags=*/true);
- const TargetSubtargetInfo &STI = C->MF->getSubtarget();
- // Add MacroFusion mutation if fusions are not empty.
- const auto &MacroFusions = STI.getMacroFusions();
- if (!MacroFusions.empty())
- DAG->addMutation(createMacroFusionDAGMutation(MacroFusions));
- return DAG;
-}
-
//===----------------------------------------------------------------------===//
// ILP Scheduler. Currently for experimental analysis of heuristics.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt
index 8715403f3839a..fe3f213b253f7 100644
--- a/llvm/lib/Target/RISCV/CMakeLists.txt
+++ b/llvm/lib/Target/RISCV/CMakeLists.txt
@@ -44,6 +44,7 @@ add_llvm_target(RISCVCodeGen
RISCVISelDAGToDAG.cpp
RISCVISelLowering.cpp
RISCVMachineFunctionInfo.cpp
+ RISCVMachineScheduler.cpp
RISCVMergeBaseOffset.cpp
RISCVOptWInstrs.cpp
RISCVPostRAExpandPseudoInsts.cpp
diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
new file mode 100644
index 0000000000000..d993d840c3d3a
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
@@ -0,0 +1,83 @@
+//===- RISCVMachineScheduler.cpp - MI Scheduler for RISC-V ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "RISCVMachineScheduler.h"
+#include "MCTargetDesc/RISCVBaseInfo.h"
+#include "MCTargetDesc/RISCVMCTargetDesc.h"
+#include "RISCVInstrInfo.h"
+#include "RISCVSubtarget.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/MC/MCInstrDesc.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/TargetParser/RISCVTargetParser.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "riscv-prera-sched-strategy"
+
+static cl::opt<bool> EnableScheduleSameVType(
+ "riscv-enable-schedule-same-vtype", cl::init(false), cl::Hidden,
+ cl::desc("Enable scheduling RVV instructions with same vtype first"));
+
+SUnit *RISCVPreRAMachineSchedStrategy::pickNode(bool &IsTopNode) {
+ if (EnableScheduleSameVType) {
+ for (SUnit *SU : Bot.Available) {
+ MachineInstr *MI = SU->getInstr();
+ const MCInstrDesc &Desc = MI->getDesc();
+ if (RISCVII::hasSEWOp(Desc.TSFlags)) {
+ unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
+ RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags);
+ if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) {
+ Bot.removeReady(SU);
+ IsTopNode = true;
+ return SU;
+ }
+ }
+ }
+ for (SUnit *SU : Bot.Pending) {
+ MachineInstr *MI = SU->getInstr();
+ const MCInstrDesc &Desc = MI->getDesc();
+ if (RISCVII::hasSEWOp(Desc.TSFlags)) {
+ unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
+ RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags);
+ if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) {
+ Bot.removeReady(SU);
+ IsTopNode = false;
+ return SU;
+ }
+ }
+ }
+ }
+ return GenericScheduler::pickNode(IsTopNode);
+}
+
+bool RISCVPreRAMachineSchedStrategy::tryCandidate(SchedCandidate &Cand,
+ SchedCandidate &TryCand,
+ SchedBoundary *Zone) const {
+ bool OriginalResult = GenericScheduler::tryCandidate(Cand, TryCand, Zone);
+
+ return OriginalResult;
+}
+
+void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
+ GenericScheduler::schedNode(SU, IsTopNode);
+ MachineInstr *MI = SU->getInstr();
+ const MCInstrDesc &Desc = MI->getDesc();
+ if (RISCVII::hasSEWOp(Desc.TSFlags)) {
+ PrevVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
+ PrevVLMUL = RISCVII::getLMul(Desc.TSFlags);
+ }
+ LLVM_DEBUG(dbgs() << "Previous scheduled Unit: ";
+ dbgs() << "SU(" << SU->NodeNum << ") - "; SU->getInstr()->dump(););
+ LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n";
+ auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL);
+ dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "")
+ << LMUL.first << "\n";);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.h b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h
new file mode 100644
index 0000000000000..bd806cef57dcb
--- /dev/null
+++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.h
@@ -0,0 +1,42 @@
+//===--- RISCVMachineScheduler.h - Custom RISC-V MI scheduler ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Custom RISC-V MI scheduler.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_RISCV_RISCVMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_RISCV_RISCVMACHINESCHEDULER_H
+
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/TargetParser/RISCVTargetParser.h"
+
+namespace llvm {
+
+/// A GenericScheduler implementation for RISCV pre RA scheduling.
+class RISCVPreRAMachineSchedStrategy : public GenericScheduler {
+private:
+ RISCVII::VLMUL PrevVLMUL;
+ unsigned PrevVSEW;
+
+public:
+ RISCVPreRAMachineSchedStrategy(const MachineSchedContext *C)
+ : GenericScheduler(C) {}
+
+protected:
+ SUnit *pickNode(bool &IsTopNode) override;
+
+ bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
+ SchedBoundary *Zone) const override;
+
+ void schedNode(SUnit *SU, bool IsTopNode) override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 35d0b3408d09f..e0dcbbddc3f53 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -14,6 +14,7 @@
#include "MCTargetDesc/RISCVBaseInfo.h"
#include "RISCV.h"
#include "RISCVMachineFunctionInfo.h"
+#include "RISCVMachineScheduler.h"
#include "RISCVTargetObjectFile.h"
#include "RISCVTargetTransformInfo.h"
#include "TargetInfo/RISCVTargetInfo.h"
@@ -340,12 +341,11 @@ class RISCVPassConfig : public TargetPassConfig {
ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext *C) const override {
- ScheduleDAGMILive *DAG = nullptr;
- if (EnableMISchedLoadClustering) {
- DAG = createGenericSchedLive(C);
+ ScheduleDAGMILive *DAG =
+ createGenericSchedLive<RISCVPreRAMachineSchedStrategy>(C);
+ if (EnableMISchedLoadClustering)
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
- }
return DAG;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/schedule.ll b/llvm/test/CodeGen/RISCV/rvv/schedule.ll
new file mode 100644
index 0000000000000..baf15ef400df5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/schedule.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=DEFAULT
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST
+
+define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) {
+; DEFAULT-LABEL: test:
+; DEFAULT: # %bb.0: # %entry
+; DEFAULT-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; DEFAULT-NEXT: vdiv.vv v12, v8, v9
+; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; DEFAULT-NEXT: vdiv.vv v13, v10, v11
+; DEFAULT-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; DEFAULT-NEXT: vadd.vv v8, v8, v9
+; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; DEFAULT-NEXT: vadd.vv v9, v10, v11
+; DEFAULT-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; DEFAULT-NEXT: vadd.vv v8, v8, v12
+; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; DEFAULT-NEXT: vadd.vv v9, v9, v13
+; DEFAULT-NEXT: vwadd.wv v8, v8, v9
+; DEFAULT-NEXT: ret
+;
+; SAME-VTYPE-FIRST-LABEL: test:
+; SAME-VTYPE-FIRST: # %bb.0: # %entry
+; SAME-VTYPE-FIRST-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; SAME-VTYPE-FIRST-NEXT: vadd.vv v12, v8, v9
+; SAME-VTYPE-FIRST-NEXT: vdiv.vv v8, v8, v9
+; SAME-VTYPE-FIRST-NEXT: vadd.vv v8, v12, v8
+; SAME-VTYPE-FIRST-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-NEXT: vadd.vv v9, v10, v11
+; SAME-VTYPE-FIRST-NEXT: vdiv.vv v10, v10, v11
+; SAME-VTYPE-FIRST-NEXT: vadd.vv v9, v9, v10
+; SAME-VTYPE-FIRST-NEXT: vwadd.wv v8, v8, v9
+; SAME-VTYPE-FIRST-NEXT: ret
+entry:
+ %0 = add <vscale x 1 x i64> %v64_0, %v64_1
+ %1 = add <vscale x 1 x i32> %v32_0, %v32_1
+ %2 = sdiv <vscale x 1 x i64> %v64_0, %v64_1
+ %3 = sdiv <vscale x 1 x i32> %v32_0, %v32_1
+ %4 = add <vscale x 1 x i64> %0, %2
+ %5 = add <vscale x 1 x i32> %1, %3
+
+ %6 = sext <vscale x 1 x i32> %5 to <vscale x 1 x i64>
+ %7 = add <vscale x 1 x i64> %4, %6
+ ret <vscale x 1 x i64> %7
+}
+
>From 185e0f8266c2dec9a161328c6c14490fe3cffa69 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Wed, 19 Jun 2024 18:45:50 +0800
Subject: [PATCH 2/2] Support buttomup/topdown/bidirectional and fix some
failures
Created using spr 1.3.6-beta.1
---
.../Target/RISCV/RISCVMachineScheduler.cpp | 79 ++++++++---
llvm/test/CodeGen/RISCV/rvv/schedule.ll | 125 +++++++++++++++---
2 files changed, 165 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
index d993d840c3d3a..530d4f6b2d845 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMachineScheduler.cpp
@@ -27,34 +27,68 @@ static cl::opt<bool> EnableScheduleSameVType(
cl::desc("Enable scheduling RVV instructions with same vtype first"));
SUnit *RISCVPreRAMachineSchedStrategy::pickNode(bool &IsTopNode) {
- if (EnableScheduleSameVType) {
- for (SUnit *SU : Bot.Available) {
+ auto FindPotentialRVVInstructionInQueue =
+ [&](SchedBoundary &Boundary, ReadyQueue Q, bool ShouldBeTop) -> SUnit * {
+ for (SUnit *SU : Q) {
+ if (SU->isScheduled)
+ continue;
+
MachineInstr *MI = SU->getInstr();
const MCInstrDesc &Desc = MI->getDesc();
if (RISCVII::hasSEWOp(Desc.TSFlags)) {
unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags);
+ // FIXME: We should consider vl and policy here.
if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) {
- Bot.removeReady(SU);
- IsTopNode = true;
+ IsTopNode = ShouldBeTop;
+ // Boundary.removeReady(SU);
+ if (SU->isTopReady())
+ Top.removeReady(SU);
+ if (SU->isBottomReady())
+ Bot.removeReady(SU);
+ LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
+ << *SU->getInstr());
return SU;
}
}
}
- for (SUnit *SU : Bot.Pending) {
- MachineInstr *MI = SU->getInstr();
- const MCInstrDesc &Desc = MI->getDesc();
- if (RISCVII::hasSEWOp(Desc.TSFlags)) {
- unsigned CurVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
- RISCVII::VLMUL CurVLMUL = RISCVII::getLMul(Desc.TSFlags);
- if (CurVSEW == PrevVSEW && CurVLMUL == PrevVLMUL) {
- Bot.removeReady(SU);
- IsTopNode = false;
- return SU;
- }
- }
+ return nullptr;
+ };
+
+ auto FindPotentialRVVInstruction = [&](SchedBoundary &Boundary,
+ bool ShouldBeTop) -> SUnit * {
+ if (SUnit *Available = FindPotentialRVVInstructionInQueue(
+ Boundary, Boundary.Available, ShouldBeTop))
+ return Available;
+ if (SUnit *Pending = FindPotentialRVVInstructionInQueue(
+ Boundary, Boundary.Pending, ShouldBeTop))
+ return Pending;
+ return nullptr;
+ };
+
+ if (EnableScheduleSameVType) {
+ if (RegionPolicy.OnlyBottomUp) {
+ if (SUnit *SU = FindPotentialRVVInstruction(Bot, false))
+ return SU;
+ } else if (RegionPolicy.OnlyTopDown) {
+ if (SUnit *SU = FindPotentialRVVInstruction(Top, true))
+ return SU;
+ } else {
+ if (SUnit *SU =
+ FindPotentialRVVInstructionInQueue(Bot, Bot.Available, false))
+ return SU;
+ if (SUnit *SU =
+ FindPotentialRVVInstructionInQueue(Top, Top.Available, true))
+ return SU;
+ if (SUnit *SU =
+ FindPotentialRVVInstructionInQueue(Bot, Bot.Pending, false))
+ return SU;
+ if (SUnit *SU =
+ FindPotentialRVVInstructionInQueue(Top, Top.Pending, true))
+ return SU;
}
}
+
return GenericScheduler::pickNode(IsTopNode);
}
@@ -73,11 +107,12 @@ void RISCVPreRAMachineSchedStrategy::schedNode(SUnit *SU, bool IsTopNode) {
if (RISCVII::hasSEWOp(Desc.TSFlags)) {
PrevVSEW = MI->getOperand(RISCVII::getSEWOpNum(Desc)).getImm();
PrevVLMUL = RISCVII::getLMul(Desc.TSFlags);
+ LLVM_DEBUG(dbgs() << "Previous scheduled Unit: ";
+ dbgs() << "SU(" << SU->NodeNum << ") - ";
+ SU->getInstr()->dump(););
+ LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n";
+ auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL);
+ dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "")
+ << LMUL.first << "\n";);
}
- LLVM_DEBUG(dbgs() << "Previous scheduled Unit: ";
- dbgs() << "SU(" << SU->NodeNum << ") - "; SU->getInstr()->dump(););
- LLVM_DEBUG(dbgs() << "Previous VSEW : " << (1 << PrevVSEW) << "\n";
- auto LMUL = RISCVVType::decodeVLMUL(PrevVLMUL);
- dbgs() << "Previous VLMUL: m" << (LMUL.second ? "f" : "")
- << LMUL.first << "\n";);
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/schedule.ll b/llvm/test/CodeGen/RISCV/rvv/schedule.ll
index baf15ef400df5..6b466d802ac4a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/schedule.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/schedule.ll
@@ -1,15 +1,33 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefix=DEFAULT
-; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
-; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=true -misched-topdown=false \
+; RUN: -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-BOTTOMUP
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=false -misched-topdown=true \
+; RUN: -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-TOPDOWN
+; RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -misched-bottomup=false -misched-topdown=false \
+; RUN: -riscv-enable-schedule-same-vtype -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefix=SAME-VTYPE-FIRST-BIDIRECTIONAL
-define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) {
+declare void @consume(i64 %scalar, <vscale x 1 x i64> %vector)
+
+define void @test(i64 %a, i64 %b, <vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v64_1, <vscale x 1 x i32> %v32_0, <vscale x 1 x i32> %v32_1) {
; DEFAULT-LABEL: test:
; DEFAULT: # %bb.0: # %entry
-; DEFAULT-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; DEFAULT-NEXT: addi sp, sp, -16
+; DEFAULT-NEXT: .cfi_def_cfa_offset 16
+; DEFAULT-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; DEFAULT-NEXT: .cfi_offset ra, -8
+; DEFAULT-NEXT: vsetvli a2, zero, e64, m1, ta, ma
; DEFAULT-NEXT: vdiv.vv v12, v8, v9
; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; DEFAULT-NEXT: div a2, a0, a1
+; DEFAULT-NEXT: add a3, a0, a1
+; DEFAULT-NEXT: mul a0, a0, a1
+; DEFAULT-NEXT: add a0, a0, a3
+; DEFAULT-NEXT: add a0, a0, a2
; DEFAULT-NEXT: vdiv.vv v13, v10, v11
; DEFAULT-NEXT: vsetvli zero, zero, e64, m1, ta, ma
; DEFAULT-NEXT: vadd.vv v8, v8, v9
@@ -20,30 +38,103 @@ define <vscale x 1 x i64> @test(<vscale x 1 x i64> %v64_0, <vscale x 1 x i64> %v
; DEFAULT-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
; DEFAULT-NEXT: vadd.vv v9, v9, v13
; DEFAULT-NEXT: vwadd.wv v8, v8, v9
+; DEFAULT-NEXT: call consume
+; DEFAULT-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; DEFAULT-NEXT: addi sp, sp, 16
; DEFAULT-NEXT: ret
;
-; SAME-VTYPE-FIRST-LABEL: test:
-; SAME-VTYPE-FIRST: # %bb.0: # %entry
-; SAME-VTYPE-FIRST-NEXT: vsetvli a0, zero, e64, m1, ta, ma
-; SAME-VTYPE-FIRST-NEXT: vadd.vv v12, v8, v9
-; SAME-VTYPE-FIRST-NEXT: vdiv.vv v8, v8, v9
-; SAME-VTYPE-FIRST-NEXT: vadd.vv v8, v12, v8
-; SAME-VTYPE-FIRST-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; SAME-VTYPE-FIRST-NEXT: vadd.vv v9, v10, v11
-; SAME-VTYPE-FIRST-NEXT: vdiv.vv v10, v10, v11
-; SAME-VTYPE-FIRST-NEXT: vadd.vv v9, v9, v10
-; SAME-VTYPE-FIRST-NEXT: vwadd.wv v8, v8, v9
-; SAME-VTYPE-FIRST-NEXT: ret
+; SAME-VTYPE-FIRST-BOTTOMUP-LABEL: test:
+; SAME-VTYPE-FIRST-BOTTOMUP: # %bb.0: # %entry
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: addi sp, sp, -16
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: .cfi_def_cfa_offset 16
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: .cfi_offset ra, -8
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vadd.vv v12, v8, v9
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: div a2, a0, a1
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: add a3, a0, a1
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vdiv.vv v8, v8, v9
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: mul a0, a0, a1
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: add a0, a0, a3
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: add a0, a0, a2
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vadd.vv v8, v12, v8
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vadd.vv v9, v10, v11
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vdiv.vv v10, v10, v11
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vadd.vv v9, v9, v10
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: vwadd.wv v8, v8, v9
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: call consume
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: addi sp, sp, 16
+; SAME-VTYPE-FIRST-BOTTOMUP-NEXT: ret
+;
+; SAME-VTYPE-FIRST-TOPDOWN-LABEL: test:
+; SAME-VTYPE-FIRST-TOPDOWN: # %bb.0: # %entry
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: addi sp, sp, -16
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: .cfi_def_cfa_offset 16
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: .cfi_offset ra, -8
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vsetvli a3, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vadd.vv v12, v10, v11
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vdiv.vv v10, v10, v11
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: add a2, a0, a1
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: mul a3, a0, a1
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: div a0, a0, a1
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: add a2, a2, a3
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vadd.vv v10, v12, v10
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vadd.vv v11, v8, v9
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vdiv.vv v8, v8, v9
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: add a0, a0, a2
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vadd.vv v8, v11, v8
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: vwadd.wv v8, v8, v10
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: call consume
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: addi sp, sp, 16
+; SAME-VTYPE-FIRST-TOPDOWN-NEXT: ret
+;
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-LABEL: test:
+; SAME-VTYPE-FIRST-BIDIRECTIONAL: # %bb.0: # %entry
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: addi sp, sp, -16
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: .cfi_def_cfa_offset 16
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: sd ra, 8(sp) # 8-byte Folded Spill
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: .cfi_offset ra, -8
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vsetvli a2, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vadd.vv v12, v10, v11
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vdiv.vv v10, v10, v11
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e64, m1, ta, ma
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: div a2, a0, a1
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: add a3, a0, a1
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: mul a0, a0, a1
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: add a0, a0, a3
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vadd.vv v11, v8, v9
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vdiv.vv v8, v8, v9
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: add a0, a0, a2
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vadd.vv v8, v11, v8
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vadd.vv v9, v12, v10
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: vwadd.wv v8, v8, v9
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: call consume
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: addi sp, sp, 16
+; SAME-VTYPE-FIRST-BIDIRECTIONAL-NEXT: ret
entry:
%0 = add <vscale x 1 x i64> %v64_0, %v64_1
+ %scalar0 = add i64 %a, %b
%1 = add <vscale x 1 x i32> %v32_0, %v32_1
%2 = sdiv <vscale x 1 x i64> %v64_0, %v64_1
+ %scalar1 = mul i64 %a, %b
%3 = sdiv <vscale x 1 x i32> %v32_0, %v32_1
%4 = add <vscale x 1 x i64> %0, %2
+ %scalar2 = sdiv i64 %a, %b
%5 = add <vscale x 1 x i32> %1, %3
%6 = sext <vscale x 1 x i32> %5 to <vscale x 1 x i64>
+ %scalar3 = add i64 %scalar0, %scalar1
%7 = add <vscale x 1 x i64> %4, %6
- ret <vscale x 1 x i64> %7
+ %scalar4 = add i64 %scalar2, %scalar3
+ call void @consume(i64 %scalar4, <vscale x 1 x i64> %7)
+ ret void
}
More information about the llvm-commits
mailing list