[llvm] [llvm][CodeGen] Add a new software pipeliner 'Window Scheduler' (PR #84443)
Hua Tian via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 7 00:42:57 PDT 2024
================
@@ -0,0 +1,692 @@
+//======----------- WindowScheduler.cpp - window scheduler -------------======//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// An implementation of the Window Scheduling software pipelining algorithm.
+//
+// The fundamental concept of the window scheduling algorithm involves folding
+// the original MBB at a specific position, followed by list scheduling on the
+// folded MIs. The optimal scheduling result is then chosen from various folding
+// positions as the final scheduling outcome.
+//
+// The primary challenge in this algorithm lies in generating the folded MIs and
+// establishing their dependencies. We have innovatively employed a new MBB,
+// created by copying the original MBB three times, known as TripleMBB. This
+// TripleMBB enables the convenient implementation of MI folding and dependency
+// establishment. To facilitate the algorithm's implementation, we have also
+// devised data structures such as OriMIs, TriMIs, TriToOri, and OriToCycle.
+//
+// Another challenge in the algorithm is the scheduling of phis. Semantically,
+// it is difficult to place the phis in the window and perform list scheduling.
+// Therefore, we schedule these phis separately after each list scheduling.
+//
+// The provided implementation is designed for use before the Register Allocator
+// (RA). If the target requires implementation after RA, it is recommended to
+// reimplement analyseII(), schedulePhi(), and expand(). Additionally,
+// target-specific logic can be added in initialize(), preProcess(), and
+// postProcess().
+//
+// Lastly, it is worth mentioning that getSearchIndexes() is an important
+// function. We have experimented with more complex heuristics on downstream
+// target and achieved favorable results.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/WindowScheduler.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachinePipeliner.h"
+#include "llvm/CodeGen/ModuloSchedule.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/TimeProfiler.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "pipeliner"
+
+namespace {
+STATISTIC(NumTryWindowSchedule,
+ "Number of loops that we attempt to use window scheduling");
+STATISTIC(NumTryWindowSearch,
+ "Number of times that we run list schedule in the window scheduling");
+STATISTIC(NumWindowSchedule,
+ "Number of loops that we successfully use window scheduling");
+STATISTIC(NumFailAnalyseII,
+ "Window scheduling abort due to the failure of the II analysis");
+
+cl::opt<unsigned>
+ WindowSearchNum("window-search-num",
+ cl::desc("The number of searches per loop in the window "
+ "algorithm. 0 means no search number limit."),
+ cl::Hidden, cl::init(6));
+
+cl::opt<unsigned> WindowSearchRatio(
+ "window-search-ratio",
+ cl::desc("The ratio of searches per loop in the window algorithm. 100 "
+ "means search all positions in the loop, while 0 means not "
+ "performing any search."),
+ cl::Hidden, cl::init(40));
+
+cl::opt<unsigned> WindowIICoeff(
+ "window-ii-coeff",
+ cl::desc(
+ "The coefficient used when initializing II in the window algorithm."),
+ cl::Hidden, cl::init(5));
+
+cl::opt<unsigned> WindowRegionLimit(
+ "window-region-limit",
+ cl::desc(
+ "The lower limit of the scheduling region in the window algorithm."),
+ cl::Hidden, cl::init(3));
+
+cl::opt<unsigned> WindowDiffLimit(
+ "window-diff-limit",
+ cl::desc("The lower limit of the difference between best II and base II in "
+ "the window algorithm. If the difference is smaller than "
+ "this lower limit, window scheduling will not be performed."),
+ cl::Hidden, cl::init(2));
+} // namespace
+
+// WindowIILimit serves as an indicator of abnormal scheduling results and could
+// potentially be referenced by the derived target window scheduler.
+cl::opt<unsigned>
+ WindowIILimit("window-ii-limit",
+ cl::desc("The upper limit of II in the window algorithm."),
+ cl::Hidden, cl::init(1000));
+
+WindowScheduler::WindowScheduler(MachineSchedContext *C, MachineLoop &ML)
+ : Context(C), MF(C->MF), MBB(ML.getHeader()), Loop(ML) {
+ Subtarget = &(MF->getSubtarget());
+ TII = Subtarget->getInstrInfo();
+ TRI = Subtarget->getRegisterInfo();
+ MRI = &MF->getRegInfo();
+ TripleDAG = std::unique_ptr<ScheduleDAGInstrs>(
+ createMachineScheduler(/*OnlyBuildGraph=*/true));
+}
+
+bool WindowScheduler::run() {
+ if (!initialize()) {
+ LLVM_DEBUG(dbgs() << "The WindowScheduler failed to initialize!\n");
+ return false;
+ }
+ // The window algorithm is time-consuming, and its compilation time should be
+ // taken into consideration.
+ TimeTraceScope Scope("WindowSearch");
+ ++NumTryWindowSchedule;
+ // Performing the relevant processing before window scheduling.
+ preProcess();
+ // The main window scheduling begins.
+ std::unique_ptr<ScheduleDAGInstrs> SchedDAG(createMachineScheduler());
+ auto SearchIndexes = getSearchIndexes(WindowSearchNum, WindowSearchRatio);
+ for (unsigned Idx : SearchIndexes) {
+ OriToCycle.clear();
+ ++NumTryWindowSearch;
+ // The scheduling starts with non-phi instruction, so SchedPhiNum needs to
+ // be added to Idx.
+ unsigned Offset = Idx + SchedPhiNum;
+ auto Range = getScheduleRange(Offset, SchedInstrNum);
+ SchedDAG->startBlock(MBB);
+ SchedDAG->enterRegion(MBB, Range.begin(), Range.end(), SchedInstrNum);
+ SchedDAG->schedule();
+ LLVM_DEBUG(SchedDAG->dump());
+ unsigned II = analyseII(*SchedDAG, Offset);
+ if (II == WindowIILimit) {
+ restoreTripleMBB();
+ LLVM_DEBUG(dbgs() << "Can't find a valid II. Keep searching...\n");
+ ++NumFailAnalyseII;
+ continue;
+ }
+ schedulePhi(Offset, II);
+ updateScheduleResult(Offset, II);
+ restoreTripleMBB();
+ LLVM_DEBUG(dbgs() << "Current window Offset is " << Offset << " and II is "
+ << II << ".\n");
+ }
+ // Performing the relevant processing after window scheduling.
+ postProcess();
+ // Check whether the scheduling result is valid.
+ if (!isScheduleValid()) {
+ LLVM_DEBUG(dbgs() << "Window scheduling is not needed!\n");
+ return false;
+ }
+ LLVM_DEBUG(dbgs() << "\nBest window offset is " << BestOffset
+ << " and Best II is " << BestII << ".\n");
+ // Expand the scheduling result to prologue, kernel, and epilogue.
+ expand();
+ ++NumWindowSchedule;
+ return true;
+}
+
+ScheduleDAGInstrs *
+WindowScheduler::createMachineScheduler(bool OnlyBuildGraph) {
+ return OnlyBuildGraph
+ ? new ScheduleDAGMI(
+ Context, std::make_unique<PostGenericScheduler>(Context),
+ true)
+ : Context->PassConfig->createMachineScheduler(Context);
+}
+
+bool WindowScheduler::initialize() {
+ if (!Subtarget->enableWindowScheduler()) {
+ LLVM_DEBUG(dbgs() << "Target disables the window scheduling!\n");
+ return false;
+ }
+ // Initialized the member variables used by window algorithm.
+ OriMIs.clear();
+ TriMIs.clear();
+ TriToOri.clear();
+ OriToCycle.clear();
+ SchedResult.clear();
+ SchedPhiNum = 0;
+ SchedInstrNum = 0;
+ BestII = UINT_MAX;
+ BestOffset = 0;
+ BaseII = 0;
+ // List scheduling used in the window algorithm depends on LiveIntervals.
+ if (!Context->LIS) {
+ LLVM_DEBUG(dbgs() << "There is no LiveIntervals information!\n");
+ return false;
+ }
+ // Check each MI in MBB.
+ SmallVector<Register, 8> PhiDefs;
+ auto PLI = TII->analyzeLoopForPipelining(MBB);
+ for (auto &MI : *MBB) {
+ if (MI.isDebugInstr() || MI.isTerminator())
+ continue;
+ if (MI.isPHI()) {
+ for (auto Def : PhiDefs)
+ if (MI.readsRegister(Def, TRI)) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Consecutive phis are not allowed in window scheduling!\n");
+ return false;
----------------
huaatian wrote:
This code is used to handle loop-carried caseļ¼for example:
![image](https://github.com/llvm/llvm-project/assets/142874007/f500cf1e-e221-4981-8512-ff24aedde969)
https://github.com/llvm/llvm-project/pull/84443
More information about the llvm-commits
mailing list