[llvm] [CodeGen][MISched] Add misched post-regalloc bottom-up scheduling (PR #76186)

Michael Maitland via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 16 10:05:45 PST 2024


https://github.com/michaelmaitland updated https://github.com/llvm/llvm-project/pull/76186

>From 2f358af5af9d52ece84aa8fb2112126e0995b3e4 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 21 Dec 2023 13:16:49 -0800
Subject: [PATCH 1/4] [RISCV] SiFive7 uses postra scheduler

---
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td    |  1 +
 llvm/test/CodeGen/RISCV/machine-combiner.ll   |  8 +--
 .../CodeGen/RISCV/short-forward-branch-opt.ll | 64 +++++++++----------
 3 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index f531ab2fac8f9f..31745341fe1da1 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -198,6 +198,7 @@ def SiFive7Model : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = 0;
+  let PostRAScheduler = true;
   let EnableIntervals = true;
   let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
                              HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll
index 7c1792e2f101f5..cfdefec04600c8 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll
@@ -1096,10 +1096,10 @@ declare double @llvm.maxnum.f64(double, double)
 define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a3, i64 %flag) {
 ; CHECK_LOCAL-LABEL: test_fmadd_strategy:
 ; CHECK_LOCAL:       # %bb.0: # %entry
-; CHECK_LOCAL-NEXT:    fmv.d fa5, fa0
 ; CHECK_LOCAL-NEXT:    fsub.d fa4, fa0, fa1
-; CHECK_LOCAL-NEXT:    fmul.d fa0, fa4, fa2
 ; CHECK_LOCAL-NEXT:    andi a0, a0, 1
+; CHECK_LOCAL-NEXT:    fmv.d fa5, fa0
+; CHECK_LOCAL-NEXT:    fmul.d fa0, fa4, fa2
 ; CHECK_LOCAL-NEXT:    beqz a0, .LBB76_2
 ; CHECK_LOCAL-NEXT:  # %bb.1: # %entry
 ; CHECK_LOCAL-NEXT:    fmul.d fa4, fa5, fa1
@@ -1110,10 +1110,10 @@ define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a
 ;
 ; CHECK_GLOBAL-LABEL: test_fmadd_strategy:
 ; CHECK_GLOBAL:       # %bb.0: # %entry
-; CHECK_GLOBAL-NEXT:    fmv.d fa5, fa0
 ; CHECK_GLOBAL-NEXT:    fsub.d fa4, fa0, fa1
-; CHECK_GLOBAL-NEXT:    fmul.d fa0, fa4, fa2
 ; CHECK_GLOBAL-NEXT:    andi a0, a0, 1
+; CHECK_GLOBAL-NEXT:    fmv.d fa5, fa0
+; CHECK_GLOBAL-NEXT:    fmul.d fa0, fa4, fa2
 ; CHECK_GLOBAL-NEXT:    beqz a0, .LBB76_2
 ; CHECK_GLOBAL-NEXT:  # %bb.1: # %entry
 ; CHECK_GLOBAL-NEXT:    fmul.d fa5, fa5, fa1
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
index 725b8fd6eeea6b..59c14ba069195f 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
@@ -813,24 +813,24 @@ define i64 @select_sll(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
 ; RV32SFB-NEXT:    not a7, a2
 ; RV32SFB-NEXT:    srli a0, a0, 1
 ; RV32SFB-NEXT:    sll t0, a1, a2
-; RV32SFB-NEXT:    srl a0, a0, a7
 ; RV32SFB-NEXT:    addi a2, a2, -32
+; RV32SFB-NEXT:    srl a0, a0, a7
 ; RV32SFB-NEXT:    mv a1, a3
-; RV32SFB-NEXT:    bgez a2, .LBB20_2
+; RV32SFB-NEXT:    bltz a2, .LBB20_2
 ; RV32SFB-NEXT:  # %bb.1: # %entry
-; RV32SFB-NEXT:    or a1, t0, a0
+; RV32SFB-NEXT:    li a3, 0
 ; RV32SFB-NEXT:  .LBB20_2: # %entry
-; RV32SFB-NEXT:    bltz a2, .LBB20_4
+; RV32SFB-NEXT:    bgez a2, .LBB20_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
-; RV32SFB-NEXT:    li a3, 0
+; RV32SFB-NEXT:    or a1, t0, a0
 ; RV32SFB-NEXT:  .LBB20_4: # %entry
 ; RV32SFB-NEXT:    beqz a6, .LBB20_6
 ; RV32SFB-NEXT:  # %bb.5: # %entry
-; RV32SFB-NEXT:    mv a1, a5
+; RV32SFB-NEXT:    mv a3, a4
 ; RV32SFB-NEXT:  .LBB20_6: # %entry
 ; RV32SFB-NEXT:    beqz a6, .LBB20_8
 ; RV32SFB-NEXT:  # %bb.7: # %entry
-; RV32SFB-NEXT:    mv a3, a4
+; RV32SFB-NEXT:    mv a1, a5
 ; RV32SFB-NEXT:  .LBB20_8: # %entry
 ; RV32SFB-NEXT:    mv a0, a3
 ; RV32SFB-NEXT:    ret
@@ -874,24 +874,24 @@ define i64 @select_srl(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
 ; RV32SFB-NEXT:    not a7, a2
 ; RV32SFB-NEXT:    slli a1, a1, 1
 ; RV32SFB-NEXT:    srl t0, a0, a2
-; RV32SFB-NEXT:    sll a1, a1, a7
 ; RV32SFB-NEXT:    addi a2, a2, -32
+; RV32SFB-NEXT:    sll a1, a1, a7
 ; RV32SFB-NEXT:    mv a0, a3
-; RV32SFB-NEXT:    bgez a2, .LBB21_2
+; RV32SFB-NEXT:    bltz a2, .LBB21_2
 ; RV32SFB-NEXT:  # %bb.1: # %entry
-; RV32SFB-NEXT:    or a0, t0, a1
+; RV32SFB-NEXT:    li a3, 0
 ; RV32SFB-NEXT:  .LBB21_2: # %entry
-; RV32SFB-NEXT:    bltz a2, .LBB21_4
+; RV32SFB-NEXT:    bgez a2, .LBB21_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
-; RV32SFB-NEXT:    li a3, 0
+; RV32SFB-NEXT:    or a0, t0, a1
 ; RV32SFB-NEXT:  .LBB21_4: # %entry
 ; RV32SFB-NEXT:    beqz a6, .LBB21_6
 ; RV32SFB-NEXT:  # %bb.5: # %entry
-; RV32SFB-NEXT:    mv a0, a4
+; RV32SFB-NEXT:    mv a3, a5
 ; RV32SFB-NEXT:  .LBB21_6: # %entry
 ; RV32SFB-NEXT:    beqz a6, .LBB21_8
 ; RV32SFB-NEXT:  # %bb.7: # %entry
-; RV32SFB-NEXT:    mv a3, a5
+; RV32SFB-NEXT:    mv a0, a4
 ; RV32SFB-NEXT:  .LBB21_8: # %entry
 ; RV32SFB-NEXT:    mv a1, a3
 ; RV32SFB-NEXT:    ret
@@ -935,24 +935,24 @@ define i64 @select_sra(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
 ; RV32SFB-NEXT:    not a7, a2
 ; RV32SFB-NEXT:    slli t0, a1, 1
 ; RV32SFB-NEXT:    srl t1, a0, a2
-; RV32SFB-NEXT:    sll a7, t0, a7
 ; RV32SFB-NEXT:    addi a2, a2, -32
+; RV32SFB-NEXT:    sll a7, t0, a7
 ; RV32SFB-NEXT:    mv a0, a3
-; RV32SFB-NEXT:    bgez a2, .LBB22_2
+; RV32SFB-NEXT:    bltz a2, .LBB22_2
 ; RV32SFB-NEXT:  # %bb.1: # %entry
-; RV32SFB-NEXT:    or a0, t1, a7
+; RV32SFB-NEXT:    srai a3, a1, 31
 ; RV32SFB-NEXT:  .LBB22_2: # %entry
-; RV32SFB-NEXT:    bltz a2, .LBB22_4
+; RV32SFB-NEXT:    bgez a2, .LBB22_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
-; RV32SFB-NEXT:    srai a3, a1, 31
+; RV32SFB-NEXT:    or a0, t1, a7
 ; RV32SFB-NEXT:  .LBB22_4: # %entry
 ; RV32SFB-NEXT:    beqz a6, .LBB22_6
 ; RV32SFB-NEXT:  # %bb.5: # %entry
-; RV32SFB-NEXT:    mv a0, a4
+; RV32SFB-NEXT:    mv a3, a5
 ; RV32SFB-NEXT:  .LBB22_6: # %entry
 ; RV32SFB-NEXT:    beqz a6, .LBB22_8
 ; RV32SFB-NEXT:  # %bb.7: # %entry
-; RV32SFB-NEXT:    mv a3, a5
+; RV32SFB-NEXT:    mv a0, a4
 ; RV32SFB-NEXT:  .LBB22_8: # %entry
 ; RV32SFB-NEXT:    mv a1, a3
 ; RV32SFB-NEXT:    ret
@@ -1088,11 +1088,11 @@ define i64 @select_andi(i64 %A, i64 %C, i1 zeroext %cond) {
 ; RV32SFB-NEXT:  # %bb.1: # %entry
 ; RV32SFB-NEXT:    andi a2, a0, 567
 ; RV32SFB-NEXT:  .LBB25_2: # %entry
+; RV32SFB-NEXT:    mv a0, a2
 ; RV32SFB-NEXT:    bnez a4, .LBB25_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
 ; RV32SFB-NEXT:    li a1, 0
 ; RV32SFB-NEXT:  .LBB25_4: # %entry
-; RV32SFB-NEXT:    mv a0, a2
 ; RV32SFB-NEXT:    ret
 entry:
  %0 = and i64 %A, 567
@@ -1130,13 +1130,13 @@ define i64 @select_ori(i64 %A, i64 %C, i1 zeroext %cond) {
 ;
 ; RV32SFB-LABEL: select_ori:
 ; RV32SFB:       # %bb.0: # %entry
-; RV32SFB-NEXT:    beqz a4, .LBB26_2
+; RV32SFB-NEXT:    bnez a4, .LBB26_2
 ; RV32SFB-NEXT:  # %bb.1: # %entry
-; RV32SFB-NEXT:    mv a1, a3
+; RV32SFB-NEXT:    ori a2, a0, 890
 ; RV32SFB-NEXT:  .LBB26_2: # %entry
-; RV32SFB-NEXT:    bnez a4, .LBB26_4
+; RV32SFB-NEXT:    beqz a4, .LBB26_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
-; RV32SFB-NEXT:    ori a2, a0, 890
+; RV32SFB-NEXT:    mv a1, a3
 ; RV32SFB-NEXT:  .LBB26_4: # %entry
 ; RV32SFB-NEXT:    mv a0, a2
 ; RV32SFB-NEXT:    ret
@@ -1176,13 +1176,13 @@ define i64 @select_xori(i64 %A, i64 %C, i1 zeroext %cond) {
 ;
 ; RV32SFB-LABEL: select_xori:
 ; RV32SFB:       # %bb.0: # %entry
-; RV32SFB-NEXT:    beqz a4, .LBB27_2
+; RV32SFB-NEXT:    bnez a4, .LBB27_2
 ; RV32SFB-NEXT:  # %bb.1: # %entry
-; RV32SFB-NEXT:    mv a1, a3
+; RV32SFB-NEXT:    xori a2, a0, 321
 ; RV32SFB-NEXT:  .LBB27_2: # %entry
-; RV32SFB-NEXT:    bnez a4, .LBB27_4
+; RV32SFB-NEXT:    beqz a4, .LBB27_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
-; RV32SFB-NEXT:    xori a2, a0, 321
+; RV32SFB-NEXT:    mv a1, a3
 ; RV32SFB-NEXT:  .LBB27_4: # %entry
 ; RV32SFB-NEXT:    mv a0, a2
 ; RV32SFB-NEXT:    ret
@@ -1272,11 +1272,11 @@ define i64 @select_srli(i64 %A, i64 %C, i1 zeroext %cond) {
 ; RV32SFB-NEXT:    mv a0, a2
 ; RV32SFB-NEXT:    bnez a4, .LBB29_2
 ; RV32SFB-NEXT:  # %bb.1: # %entry
-; RV32SFB-NEXT:    srli a0, a1, 3
+; RV32SFB-NEXT:    li a3, 0
 ; RV32SFB-NEXT:  .LBB29_2: # %entry
 ; RV32SFB-NEXT:    bnez a4, .LBB29_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
-; RV32SFB-NEXT:    li a3, 0
+; RV32SFB-NEXT:    srli a0, a1, 3
 ; RV32SFB-NEXT:  .LBB29_4: # %entry
 ; RV32SFB-NEXT:    mv a1, a3
 ; RV32SFB-NEXT:    ret

>From 866b301103d7e1872e40b1bfcc6a2692d4ae85fa Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 21 Dec 2023 13:17:34 -0800
Subject: [PATCH 2/4] [CodeGen][MISched] dumpSched direction depends on field
 in DAG.

This is a precommit to supporting post reg-alloc bottom up scheduling.
We'd like to have post-ra scheduling direction that can be different from
pre-ra direction. The current dumpSchedule function is changed in this
patch to support the fact that the post-ra and pre-ra directions will
depend on different command line options.
---
 llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h | 12 ++++++++++
 llvm/lib/CodeGen/MachineScheduler.cpp         | 24 ++++++++++++++++---
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
index ef7662a8e7a26a..85de18f5169e5e 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -191,7 +191,19 @@ namespace llvm {
     /// applicable).
     using SUList = std::list<SUnit *>;
 
+    /// The direction that should be used to dump the scheduled Sequence.
+    enum DumpDirection {
+      TopDown,
+      BottomUp,
+      Bidirectional,
+      NotSet,
+    };
+
+    void setDumpDirection(DumpDirection D) { DumpDir = D; }
+
   protected:
+    DumpDirection DumpDir = NotSet;
+
     /// A map from ValueType to SUList, used during DAG construction, as
     /// a means of remembering which SUs depend on which memory locations.
     class Value2SUsMap;
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 886137d86f87de..83f16618580cf8 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -440,6 +440,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   // Instantiate the selected scheduler for this target, function, and
   // optimization level.
   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
+  ScheduleDAGMI::DumpDirection Dir;
+  if (ForceTopDown)
+    Dir = ScheduleDAGMI::DumpDirection::TopDown;
+  else if (ForceBottomUp)
+    Dir = ScheduleDAGMI::DumpDirection::BottomUp;
+  else
+    Dir = ScheduleDAGMI::DumpDirection::Bidirectional;
+  Scheduler->setDumpDirection(Dir);
   scheduleRegions(*Scheduler, false);
 
   LLVM_DEBUG(LIS->dump());
@@ -473,6 +481,9 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   // Instantiate the selected scheduler for this target, function, and
   // optimization level.
   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
+  Scheduler->setDumpDirection(PostRADirection == MISchedPostRASched::TopDown
+                                  ? ScheduleDAGMI::DumpDirection::TopDown
+                                  : ScheduleDAGMI::DumpDirection::BottomUp);
   scheduleRegions(*Scheduler, true);
 
   if (VerifyScheduling)
@@ -1125,12 +1136,14 @@ LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
   if (MISchedDumpScheduleTrace) {
-    if (ForceTopDown)
+    if (DumpDir == TopDown)
       dumpScheduleTraceTopDown();
-    else if (ForceBottomUp)
+    else if (DumpDir == BottomUp)
       dumpScheduleTraceBottomUp();
-    else {
+    else if (DumpDir == BottomUp) {
       dbgs() << "* Schedule table (Bidirectional): not implemented\n";
+    } else {
+      dbgs() << "* Schedule table: DumpDirection not set.\n";
     }
   }
 
@@ -3827,6 +3840,11 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
         DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
             Itin, DAG);
   }
+  if (!Bot.HazardRec) {
+    Bot.HazardRec =
+        DAG->MF.getSubtarget().getInstrInfo()->CreateTargetMIHazardRecognizer(
+            Itin, DAG);
+  }
 }
 
 void PostGenericScheduler::registerRoots() {

>From ccd289f60e19e90e27fea8a9cc742d10bb53b544 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 21 Dec 2023 11:36:15 -0800
Subject: [PATCH 3/4] [CodeGen][MISched] Add misched post-regalloc bottom-up
 scheduling

The main justification for this patch is that I'd like to add a
HazardRecognizer that does not work in the top-down direction. This creates a
need for bottom-up scheduling in the PostGenericScheduler. I found that the
PostGenericScheduler would undo the scheduling which was influenced by the
HazardRecognition that happend in the pre-regalloc GenericScheduler.

There is also the possibility that the bottom-up direction will lead to
performance improvements on certain targets, as this is certainly the case for
the pre-regalloc GenericScheduler. This patch will give people the
opportunity to experiment for their sub-targets. However, this patch
keeps the top-down approach as the default for the PostGenericScheduler
since that is what subtargets expect today.
---
 llvm/include/llvm/CodeGen/MachineScheduler.h  |  19 +--
 llvm/lib/CodeGen/MachineScheduler.cpp         | 128 +++++++++++++-----
 llvm/lib/Target/RISCV/RISCVSchedSiFive7.td    |   1 -
 .../RISCV/misched-postra-direction.mir        |  62 +++++++++
 4 files changed, 167 insertions(+), 43 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/misched-postra-direction.mir

diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index 9f16cf5d5bc387..c26466b6455e3b 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -1293,19 +1293,19 @@ class PostGenericScheduler : public GenericSchedulerBase {
 protected:
   ScheduleDAGMI *DAG = nullptr;
   SchedBoundary Top;
-  SmallVector<SUnit*, 8> BotRoots;
+  SchedBoundary Bot;
+  MachineSchedPolicy RegionPolicy;
 
 public:
-  PostGenericScheduler(const MachineSchedContext *C):
-    GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {}
+  PostGenericScheduler(const MachineSchedContext *C)
+      : GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ"),
+        Bot(SchedBoundary::BotQID, "BotQ") {}
 
   ~PostGenericScheduler() override = default;
 
   void initPolicy(MachineBasicBlock::iterator Begin,
                   MachineBasicBlock::iterator End,
-                  unsigned NumRegionInstrs) override {
-    /* no configurable policy */
-  }
+                  unsigned NumRegionInstrs) override;
 
   /// PostRA scheduling does not track pressure.
   bool shouldTrackPressure() const override { return false; }
@@ -1328,15 +1328,16 @@ class PostGenericScheduler : public GenericSchedulerBase {
     Top.releaseNode(SU, SU->TopReadyCycle, false);
   }
 
-  // Only called for roots.
   void releaseBottomNode(SUnit *SU) override {
-    BotRoots.push_back(SU);
+    if (SU->isScheduled)
+      return;
+    Bot.releaseNode(SU, SU->BotReadyCycle, false);
   }
 
 protected:
   virtual bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand);
 
-  void pickNodeFromQueue(SchedCandidate &Cand);
+  void pickNodeFromQueue(SchedBoundary &Zone, SchedCandidate &Cand);
 };
 
 /// Create the standard converging machine scheduler. This will be used as the
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 83f16618580cf8..697ad308590f03 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -81,6 +81,22 @@ cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
                            cl::desc("Force top-down list scheduling"));
 cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
                             cl::desc("Force bottom-up list scheduling"));
+namespace MISchedPostRASched {
+enum Direction {
+  TopDown,
+  BottomUp,
+};
+} // end namespace MISchedPostRASched
+cl::opt<MISchedPostRASched::Direction> PostRADirection(
+    "misched-postra-direction", cl::Hidden,
+    cl::desc("Post reg-alloc list scheduling direction"),
+    // Default to top-down because it was implemented first and existing targets
+    // expect that behavior by default.
+    cl::init(MISchedPostRASched::TopDown),
+    cl::values(clEnumValN(MISchedPostRASched::TopDown, "topdown",
+                          "Force top-down post reg-alloc list scheduling"),
+               clEnumValN(MISchedPostRASched::BottomUp, "bottomup",
+                          "Force bottom-up post reg-alloc list scheduling")));
 cl::opt<bool>
 DumpCriticalPathLength("misched-dcpl", cl::Hidden,
                        cl::desc("Print critical path length to stdout"));
@@ -440,14 +456,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   // Instantiate the selected scheduler for this target, function, and
   // optimization level.
   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createMachineScheduler());
-  ScheduleDAGMI::DumpDirection Dir;
+  ScheduleDAGMI::DumpDirection D;
   if (ForceTopDown)
-    Dir = ScheduleDAGMI::DumpDirection::TopDown;
+    D = ScheduleDAGMI::DumpDirection::TopDown;
   else if (ForceBottomUp)
-    Dir = ScheduleDAGMI::DumpDirection::BottomUp;
+    D = ScheduleDAGMI::DumpDirection::BottomUp;
   else
-    Dir = ScheduleDAGMI::DumpDirection::Bidirectional;
-  Scheduler->setDumpDirection(Dir);
+    D = ScheduleDAGMI::DumpDirection::Bidirectional;
+  Scheduler->setDumpDirection(D);
   scheduleRegions(*Scheduler, false);
 
   LLVM_DEBUG(LIS->dump());
@@ -481,9 +497,12 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) {
   // Instantiate the selected scheduler for this target, function, and
   // optimization level.
   std::unique_ptr<ScheduleDAGInstrs> Scheduler(createPostMachineScheduler());
-  Scheduler->setDumpDirection(PostRADirection == MISchedPostRASched::TopDown
-                                  ? ScheduleDAGMI::DumpDirection::TopDown
-                                  : ScheduleDAGMI::DumpDirection::BottomUp);
+  ScheduleDAGMI::DumpDirection D;
+  if (PostRADirection == MISchedPostRASched::TopDown)
+    D = ScheduleDAGMI::DumpDirection::TopDown;
+  else
+    D = ScheduleDAGMI::DumpDirection::BottomUp;
+  Scheduler->setDumpDirection(D);
   scheduleRegions(*Scheduler, true);
 
   if (VerifyScheduling)
@@ -1136,11 +1155,11 @@ LLVM_DUMP_METHOD void ScheduleDAGMI::dumpScheduleTraceBottomUp() const {
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void ScheduleDAGMI::dumpSchedule() const {
   if (MISchedDumpScheduleTrace) {
-    if (DumpDir == TopDown)
+    if (DumpDir == DumpDirection::TopDown)
       dumpScheduleTraceTopDown();
-    else if (DumpDir == BottomUp)
+    else if (DumpDir == DumpDirection::BottomUp)
       dumpScheduleTraceBottomUp();
-    else if (DumpDir == BottomUp) {
+    else if (DumpDir == DumpDirection::Bidirectional) {
       dbgs() << "* Schedule table (Bidirectional): not implemented\n";
     } else {
       dbgs() << "* Schedule table: DumpDirection not set.\n";
@@ -3830,7 +3849,7 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
 
   Rem.init(DAG, SchedModel);
   Top.init(DAG, SchedModel, &Rem);
-  BotRoots.clear();
+  Bot.init(DAG, SchedModel, &Rem);
 
   // Initialize the HazardRecognizers. If itineraries don't exist, are empty,
   // or are disabled, then these HazardRecs will be disabled.
@@ -3847,11 +3866,23 @@ void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) {
   }
 }
 
+void PostGenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
+                                      MachineBasicBlock::iterator End,
+                                      unsigned NumRegionInstrs) {
+  if (PostRADirection == MISchedPostRASched::TopDown) {
+    RegionPolicy.OnlyTopDown = true;
+    RegionPolicy.OnlyBottomUp = false;
+  } else if (PostRADirection == MISchedPostRASched::BottomUp) {
+    RegionPolicy.OnlyTopDown = false;
+    RegionPolicy.OnlyBottomUp = true;
+  }
+}
+
 void PostGenericScheduler::registerRoots() {
   Rem.CriticalPath = DAG->ExitSU.getDepth();
 
   // Some roots may not feed into ExitSU. Check all of them in case.
-  for (const SUnit *SU : BotRoots) {
+  for (const SUnit *SU : Bot.Available) {
     if (SU->getDepth() > Rem.CriticalPath)
       Rem.CriticalPath = SU->getDepth();
   }
@@ -3908,12 +3939,13 @@ bool PostGenericScheduler::tryCandidate(SchedCandidate &Cand,
   return false;
 }
 
-void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
-  ReadyQueue &Q = Top.Available;
+void PostGenericScheduler::pickNodeFromQueue(SchedBoundary &Zone,
+                                             SchedCandidate &Cand) {
+  ReadyQueue &Q = Zone.Available;
   for (SUnit *SU : Q) {
     SchedCandidate TryCand(Cand.Policy);
     TryCand.SU = SU;
-    TryCand.AtTop = true;
+    TryCand.AtTop = Zone.isTop();
     TryCand.initResourceDelta(DAG, SchedModel);
     if (tryCandidate(Cand, TryCand)) {
       Cand.setBest(TryCand);
@@ -3925,29 +3957,54 @@ void PostGenericScheduler::pickNodeFromQueue(SchedCandidate &Cand) {
 /// Pick the next node to schedule.
 SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
   if (DAG->top() == DAG->bottom()) {
-    assert(Top.Available.empty() && Top.Pending.empty() && "ReadyQ garbage");
+    assert(Top.Available.empty() && Top.Pending.empty() &&
+           Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
     return nullptr;
   }
   SUnit *SU;
   do {
-    SU = Top.pickOnlyChoice();
-    if (SU) {
-      tracePick(Only1, true);
+    if (RegionPolicy.OnlyBottomUp) {
+      assert(!RegionPolicy.OnlyTopDown);
+      SU = Bot.pickOnlyChoice();
+      if (SU) {
+        tracePick(Only1, true);
+      } else {
+        CandPolicy NoPolicy;
+        SchedCandidate BotCand(NoPolicy);
+        // Set the top-down policy based on the state of the current top zone
+        // and the instructions outside the zone, including the bottom zone.
+        setPolicy(BotCand.Policy, /*IsPostRA=*/true, Bot, nullptr);
+        pickNodeFromQueue(Bot, BotCand);
+        assert(BotCand.Reason != NoCand && "failed to find a candidate");
+        tracePick(BotCand);
+        SU = BotCand.SU;
+      }
+      IsTopNode = false;
     } else {
-      CandPolicy NoPolicy;
-      SchedCandidate TopCand(NoPolicy);
-      // Set the top-down policy based on the state of the current top zone and
-      // the instructions outside the zone, including the bottom zone.
-      setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
-      pickNodeFromQueue(TopCand);
-      assert(TopCand.Reason != NoCand && "failed to find a candidate");
-      tracePick(TopCand);
-      SU = TopCand.SU;
+
+      assert(RegionPolicy.OnlyTopDown);
+      SU = Top.pickOnlyChoice();
+      if (SU) {
+        tracePick(Only1, true);
+      } else {
+        CandPolicy NoPolicy;
+        SchedCandidate TopCand(NoPolicy);
+        // Set the top-down policy based on the state of the current top zone
+        // and the instructions outside the zone, including the bottom zone.
+        setPolicy(TopCand.Policy, /*IsPostRA=*/true, Top, nullptr);
+        pickNodeFromQueue(Top, TopCand);
+        assert(TopCand.Reason != NoCand && "failed to find a candidate");
+        tracePick(TopCand);
+        SU = TopCand.SU;
+      }
+      IsTopNode = true;
     }
   } while (SU->isScheduled);
 
-  IsTopNode = true;
-  Top.removeReady(SU);
+  if (SU->isTopReady())
+    Top.removeReady(SU);
+  if (SU->isBottomReady())
+    Bot.removeReady(SU);
 
   LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
                     << *SU->getInstr());
@@ -3957,8 +4014,13 @@ SUnit *PostGenericScheduler::pickNode(bool &IsTopNode) {
 /// Called after ScheduleDAGMI has scheduled an instruction and updated
 /// scheduled/remaining flags in the DAG nodes.
 void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
-  SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
-  Top.bumpNode(SU);
+  if (IsTopNode) {
+    SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.getCurrCycle());
+    Top.bumpNode(SU);
+  } else {
+    SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.getCurrCycle());
+    Bot.bumpNode(SU);
+  }
 }
 
 ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
index 31745341fe1da1..f531ab2fac8f9f 100644
--- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
+++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td
@@ -198,7 +198,6 @@ def SiFive7Model : SchedMachineModel {
   let LoadLatency = 3;
   let MispredictPenalty = 3;
   let CompleteModel = 0;
-  let PostRAScheduler = true;
   let EnableIntervals = true;
   let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx,
                              HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne,
diff --git a/llvm/test/CodeGen/RISCV/misched-postra-direction.mir b/llvm/test/CodeGen/RISCV/misched-postra-direction.mir
new file mode 100644
index 00000000000000..aa0e03041a54f8
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/misched-postra-direction.mir
@@ -0,0 +1,62 @@
+# RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -run-pass=postmisched -enable-post-misched -debug-only=machine-scheduler -misched-dump-schedule-trace -misched-postra-direction=topdown -o - %s 2>&1 | FileCheck --check-prefix=TOPDOWN %s
+# RUN: llc -mtriple=riscv64 -mcpu=sifive-x280 -run-pass=postmisched -enable-post-misched -debug-only=machine-scheduler -misched-dump-schedule-trace -misched-postra-direction=bottomup -o - %s 2>&1 | FileCheck --check-prefix=BOTTOMUP %s
+
+# REQUIRES: asserts
+
+---
+name:            add_m2
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    liveins: $x10, $v8m2, $v10m2, $v12m2, $v14m2
+
+    dead $x0 = PseudoVSETVLI killed renamable $x10, 153 /* e64, m2, tu, ma */, implicit-def $vl, implicit-def $vtype
+    renamable $v8m2 = PseudoVADD_VV_M2 undef renamable $v8m2, killed renamable $v8m2, killed renamable $v10m2, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype, implicit $vl, implicit $vtype
+    renamable $v10m2 = PseudoVADD_VV_M2 undef renamable $v10m2, killed renamable $v14m2, killed renamable $v12m2, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype, implicit $vl, implicit $vtype
+    renamable $v8m2 = PseudoVADD_VV_M2 undef renamable $v8m2, killed renamable $v8m2, killed renamable $v10m2, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype, implicit $vl, implicit $vtype
+    PseudoRET implicit killed $v8m2
+...
+
+# TOPDOWN: *** Final schedule for %bb.0 ***
+# TOPDOWN-NEXT:  * Schedule table (TopDown):
+# TOPDOWN-NEXT:   i: issue
+# TOPDOWN-NEXT:   x: resource booked
+# TOPDOWN-NEXT: Cycle              | 0  | 1  | 2  | 3  | 4  | 5  | 6  | 7  | 8  | 9  | 10 | 11 | 12 |
+# TOPDOWN-NEXT: SU(0)              | i  |    |    |    |    |    |    |    |    |    |    |    |    |
+# TOPDOWN-NEXT:       SiFive7PipeA | x  |    |    |    |    |    |    |    |    |    |    |    |    |
+# TOPDOWN-NEXT:      SiFive7PipeAB | x  |    |    |    |    |    |    |    |    |    |    |    |    |
+# TOPDOWN-NEXT: SU(1)              | i  |    |    |    |    |    |    |    |    |    |    |    |    |
+# TOPDOWN-NEXT:         SiFive7VCQ | x  |    |    |    |    |    |    |    |    |    |    |    |    |
+# TOPDOWN-NEXT:          SiFive7VA |    | x  | x  | x  | x  |    |    |    |    |    |    |    |    |
+# TOPDOWN-NEXT: SU(2)              |    |    |    |    | i  |    |    |    |    |    |    |    |    |
+# TOPDOWN-NEXT:         SiFive7VCQ |    |    |    |    | x  |    |    |    |    |    |    |    |    |
+# TOPDOWN-NEXT:          SiFive7VA |    |    |    |    |    | x  | x  | x  | x  |    |    |    |    |
+# TOPDOWN-NEXT: SU(3)              |    |    |    |    |    |    |    |    | i  |    |    |    |    |
+# TOPDOWN-NEXT:         SiFive7VCQ |    |    |    |    |    |    |    |    | x  |    |    |    |    |
+# TOPDOWN-NEXT:          SiFive7VA |    |    |    |    |    |    |    |    |    | x  | x  | x  | x  |
+# TOPDOWN-NEXT: SU(0):   dead $x0 = PseudoVSETVLI renamable $x10, 153, implicit-def $vl, implicit-def $vtype
+# TOPDOWN-NEXT: SU(1):   renamable $v8m2 = PseudoVADD_VV_M2 undef renamable $v8m2(tied-def 0), renamable $v8m2, renamable $v10m2, $noreg, 6, 0, implicit $vl, implicit $vtype, implicit $vl, implicit $vtype
+# TOPDOWN-NEXT: SU(2):   renamable $v10m2 = PseudoVADD_VV_M2 undef renamable $v10m2(tied-def 0), renamable $v14m2, renamable $v12m2, $noreg, 6, 0, implicit $vl, implicit $vtype, implicit $vl, implicit $vtype
+# TOPDOWN-NEXT: SU(3):   renamable $v8m2 = PseudoVADD_VV_M2 undef renamable $v8m2(tied-def 0), renamable $v8m2, renamable $v10m2, $noreg, 6, 0, implicit $vl, implicit $vtype, implicit $vl, implicit $vtype
+
+# BOTTOMUP: *** Final schedule for %bb.0 ***
+# BOTTOMUP-NEXT:  * Schedule table (BottomUp):
+# BOTTOMUP-NEXT:   i: issue
+# BOTTOMUP-NEXT:   x: resource booked
+# BOTTOMUP-NEXT: Cycle              | 12 | 11 | 10 | 9  | 8  | 7  | 6  | 5  | 4  | 3  | 2  | 1  | 0  |
+# BOTTOMUP-NEXT: SU(0)              | i  |    |    |    |    |    |    |    |    |    |    |    |    |
+# BOTTOMUP-NEXT:       SiFive7PipeA | x  |    |    |    |    |    |    |    |    |    |    |    |    |
+# BOTTOMUP-NEXT:      SiFive7PipeAB | x  |    |    |    |    |    |    |    |    |    |    |    |    |
+# BOTTOMUP-NEXT: SU(1)              | i  |    |    |    |    |    |    |    |    |    |    |    |    |
+# BOTTOMUP-NEXT:         SiFive7VCQ | x  |    |    |    |    |    |    |    |    |    |    |    |    |
+# BOTTOMUP-NEXT:          SiFive7VA |    | x  | x  | x  | x  |    |    |    |    |    |    |    |    |
+# BOTTOMUP-NEXT: SU(2)              |    |    |    |    | i  |    |    |    |    |    |    |    |    |
+# BOTTOMUP-NEXT:         SiFive7VCQ |    |    |    |    | x  |    |    |    |    |    |    |    |    |
+# BOTTOMUP-NEXT:          SiFive7VA |    |    |    |    |    | x  | x  | x  | x  |    |    |    |    |
+# BOTTOMUP-NEXT: SU(3)              |    |    |    |    |    |    |    |    | i  |    |    |    |    |
+# BOTTOMUP-NEXT:         SiFive7VCQ |    |    |    |    |    |    |    |    | x  |    |    |    |    |
+# BOTTOMUP-NEXT:          SiFive7VA |    |    |    |    |    |    |    |    |    | x  | x  | x  | x  |
+# BOTTOMUP-NEXT: SU(0):   dead $x0 = PseudoVSETVLI renamable $x10, 153, implicit-def $vl, implicit-def $vtype
+# BOTTOMUP-NEXT: SU(1):   renamable $v8m2 = PseudoVADD_VV_M2 undef renamable $v8m2(tied-def 0), renamable $v8m2, renamable $v10m2, $noreg, 6, 0, implicit $vl, implicit $vtype, implicit $vl, implicit $vtype
+# BOTTOMUP-NEXT: SU(2):   renamable $v10m2 = PseudoVADD_VV_M2 undef renamable $v10m2(tied-def 0), renamable $v14m2, renamable $v12m2, $noreg, 6, 0, implicit $vl, implicit $vtype, implicit $vl, implicit $vtype
+# BOTTOMUP-NEXT: SU(3):   renamable $v8m2 = PseudoVADD_VV_M2 undef renamable $v8m2(tied-def 0), renamable $v8m2, renamable $v10m2, $noreg, 6, 0, implicit $vl, implicit $vtype, implicit $vl, implicit $vtype

>From bf701527633abd89fb9be68ae96457c104a91adb Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Tue, 16 Jan 2024 10:05:26 -0800
Subject: [PATCH 4/4] !fixup update test checks

---
 llvm/test/CodeGen/RISCV/machine-combiner.ll   |  8 +--
 .../CodeGen/RISCV/short-forward-branch-opt.ll | 64 +++++++++----------
 2 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll
index cfdefec04600c8..7c1792e2f101f5 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll
@@ -1096,10 +1096,10 @@ declare double @llvm.maxnum.f64(double, double)
 define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a3, i64 %flag) {
 ; CHECK_LOCAL-LABEL: test_fmadd_strategy:
 ; CHECK_LOCAL:       # %bb.0: # %entry
-; CHECK_LOCAL-NEXT:    fsub.d fa4, fa0, fa1
-; CHECK_LOCAL-NEXT:    andi a0, a0, 1
 ; CHECK_LOCAL-NEXT:    fmv.d fa5, fa0
+; CHECK_LOCAL-NEXT:    fsub.d fa4, fa0, fa1
 ; CHECK_LOCAL-NEXT:    fmul.d fa0, fa4, fa2
+; CHECK_LOCAL-NEXT:    andi a0, a0, 1
 ; CHECK_LOCAL-NEXT:    beqz a0, .LBB76_2
 ; CHECK_LOCAL-NEXT:  # %bb.1: # %entry
 ; CHECK_LOCAL-NEXT:    fmul.d fa4, fa5, fa1
@@ -1110,10 +1110,10 @@ define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a
 ;
 ; CHECK_GLOBAL-LABEL: test_fmadd_strategy:
 ; CHECK_GLOBAL:       # %bb.0: # %entry
-; CHECK_GLOBAL-NEXT:    fsub.d fa4, fa0, fa1
-; CHECK_GLOBAL-NEXT:    andi a0, a0, 1
 ; CHECK_GLOBAL-NEXT:    fmv.d fa5, fa0
+; CHECK_GLOBAL-NEXT:    fsub.d fa4, fa0, fa1
 ; CHECK_GLOBAL-NEXT:    fmul.d fa0, fa4, fa2
+; CHECK_GLOBAL-NEXT:    andi a0, a0, 1
 ; CHECK_GLOBAL-NEXT:    beqz a0, .LBB76_2
 ; CHECK_GLOBAL-NEXT:  # %bb.1: # %entry
 ; CHECK_GLOBAL-NEXT:    fmul.d fa5, fa5, fa1
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
index 59c14ba069195f..725b8fd6eeea6b 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
@@ -813,24 +813,24 @@ define i64 @select_sll(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
 ; RV32SFB-NEXT:    not a7, a2
 ; RV32SFB-NEXT:    srli a0, a0, 1
 ; RV32SFB-NEXT:    sll t0, a1, a2
-; RV32SFB-NEXT:    addi a2, a2, -32
 ; RV32SFB-NEXT:    srl a0, a0, a7
+; RV32SFB-NEXT:    addi a2, a2, -32
 ; RV32SFB-NEXT:    mv a1, a3
-; RV32SFB-NEXT:    bltz a2, .LBB20_2
+; RV32SFB-NEXT:    bgez a2, .LBB20_2
 ; RV32SFB-NEXT:  # %bb.1: # %entry
-; RV32SFB-NEXT:    li a3, 0
+; RV32SFB-NEXT:    or a1, t0, a0
 ; RV32SFB-NEXT:  .LBB20_2: # %entry
-; RV32SFB-NEXT:    bgez a2, .LBB20_4
+; RV32SFB-NEXT:    bltz a2, .LBB20_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
-; RV32SFB-NEXT:    or a1, t0, a0
+; RV32SFB-NEXT:    li a3, 0
 ; RV32SFB-NEXT:  .LBB20_4: # %entry
 ; RV32SFB-NEXT:    beqz a6, .LBB20_6
 ; RV32SFB-NEXT:  # %bb.5: # %entry
-; RV32SFB-NEXT:    mv a3, a4
+; RV32SFB-NEXT:    mv a1, a5
 ; RV32SFB-NEXT:  .LBB20_6: # %entry
 ; RV32SFB-NEXT:    beqz a6, .LBB20_8
 ; RV32SFB-NEXT:  # %bb.7: # %entry
-; RV32SFB-NEXT:    mv a1, a5
+; RV32SFB-NEXT:    mv a3, a4
 ; RV32SFB-NEXT:  .LBB20_8: # %entry
 ; RV32SFB-NEXT:    mv a0, a3
 ; RV32SFB-NEXT:    ret
@@ -874,24 +874,24 @@ define i64 @select_srl(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
 ; RV32SFB-NEXT:    not a7, a2
 ; RV32SFB-NEXT:    slli a1, a1, 1
 ; RV32SFB-NEXT:    srl t0, a0, a2
-; RV32SFB-NEXT:    addi a2, a2, -32
 ; RV32SFB-NEXT:    sll a1, a1, a7
+; RV32SFB-NEXT:    addi a2, a2, -32
 ; RV32SFB-NEXT:    mv a0, a3
-; RV32SFB-NEXT:    bltz a2, .LBB21_2
+; RV32SFB-NEXT:    bgez a2, .LBB21_2
 ; RV32SFB-NEXT:  # %bb.1: # %entry
-; RV32SFB-NEXT:    li a3, 0
+; RV32SFB-NEXT:    or a0, t0, a1
 ; RV32SFB-NEXT:  .LBB21_2: # %entry
-; RV32SFB-NEXT:    bgez a2, .LBB21_4
+; RV32SFB-NEXT:    bltz a2, .LBB21_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
-; RV32SFB-NEXT:    or a0, t0, a1
+; RV32SFB-NEXT:    li a3, 0
 ; RV32SFB-NEXT:  .LBB21_4: # %entry
 ; RV32SFB-NEXT:    beqz a6, .LBB21_6
 ; RV32SFB-NEXT:  # %bb.5: # %entry
-; RV32SFB-NEXT:    mv a3, a5
+; RV32SFB-NEXT:    mv a0, a4
 ; RV32SFB-NEXT:  .LBB21_6: # %entry
 ; RV32SFB-NEXT:    beqz a6, .LBB21_8
 ; RV32SFB-NEXT:  # %bb.7: # %entry
-; RV32SFB-NEXT:    mv a0, a4
+; RV32SFB-NEXT:    mv a3, a5
 ; RV32SFB-NEXT:  .LBB21_8: # %entry
 ; RV32SFB-NEXT:    mv a1, a3
 ; RV32SFB-NEXT:    ret
@@ -935,24 +935,24 @@ define i64 @select_sra(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
 ; RV32SFB-NEXT:    not a7, a2
 ; RV32SFB-NEXT:    slli t0, a1, 1
 ; RV32SFB-NEXT:    srl t1, a0, a2
-; RV32SFB-NEXT:    addi a2, a2, -32
 ; RV32SFB-NEXT:    sll a7, t0, a7
+; RV32SFB-NEXT:    addi a2, a2, -32
 ; RV32SFB-NEXT:    mv a0, a3
-; RV32SFB-NEXT:    bltz a2, .LBB22_2
+; RV32SFB-NEXT:    bgez a2, .LBB22_2
 ; RV32SFB-NEXT:  # %bb.1: # %entry
-; RV32SFB-NEXT:    srai a3, a1, 31
+; RV32SFB-NEXT:    or a0, t1, a7
 ; RV32SFB-NEXT:  .LBB22_2: # %entry
-; RV32SFB-NEXT:    bgez a2, .LBB22_4
+; RV32SFB-NEXT:    bltz a2, .LBB22_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
-; RV32SFB-NEXT:    or a0, t1, a7
+; RV32SFB-NEXT:    srai a3, a1, 31
 ; RV32SFB-NEXT:  .LBB22_4: # %entry
 ; RV32SFB-NEXT:    beqz a6, .LBB22_6
 ; RV32SFB-NEXT:  # %bb.5: # %entry
-; RV32SFB-NEXT:    mv a3, a5
+; RV32SFB-NEXT:    mv a0, a4
 ; RV32SFB-NEXT:  .LBB22_6: # %entry
 ; RV32SFB-NEXT:    beqz a6, .LBB22_8
 ; RV32SFB-NEXT:  # %bb.7: # %entry
-; RV32SFB-NEXT:    mv a0, a4
+; RV32SFB-NEXT:    mv a3, a5
 ; RV32SFB-NEXT:  .LBB22_8: # %entry
 ; RV32SFB-NEXT:    mv a1, a3
 ; RV32SFB-NEXT:    ret
@@ -1088,11 +1088,11 @@ define i64 @select_andi(i64 %A, i64 %C, i1 zeroext %cond) {
 ; RV32SFB-NEXT:  # %bb.1: # %entry
 ; RV32SFB-NEXT:    andi a2, a0, 567
 ; RV32SFB-NEXT:  .LBB25_2: # %entry
-; RV32SFB-NEXT:    mv a0, a2
 ; RV32SFB-NEXT:    bnez a4, .LBB25_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
 ; RV32SFB-NEXT:    li a1, 0
 ; RV32SFB-NEXT:  .LBB25_4: # %entry
+; RV32SFB-NEXT:    mv a0, a2
 ; RV32SFB-NEXT:    ret
 entry:
  %0 = and i64 %A, 567
@@ -1130,13 +1130,13 @@ define i64 @select_ori(i64 %A, i64 %C, i1 zeroext %cond) {
 ;
 ; RV32SFB-LABEL: select_ori:
 ; RV32SFB:       # %bb.0: # %entry
-; RV32SFB-NEXT:    bnez a4, .LBB26_2
+; RV32SFB-NEXT:    beqz a4, .LBB26_2
 ; RV32SFB-NEXT:  # %bb.1: # %entry
-; RV32SFB-NEXT:    ori a2, a0, 890
+; RV32SFB-NEXT:    mv a1, a3
 ; RV32SFB-NEXT:  .LBB26_2: # %entry
-; RV32SFB-NEXT:    beqz a4, .LBB26_4
+; RV32SFB-NEXT:    bnez a4, .LBB26_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
-; RV32SFB-NEXT:    mv a1, a3
+; RV32SFB-NEXT:    ori a2, a0, 890
 ; RV32SFB-NEXT:  .LBB26_4: # %entry
 ; RV32SFB-NEXT:    mv a0, a2
 ; RV32SFB-NEXT:    ret
@@ -1176,13 +1176,13 @@ define i64 @select_xori(i64 %A, i64 %C, i1 zeroext %cond) {
 ;
 ; RV32SFB-LABEL: select_xori:
 ; RV32SFB:       # %bb.0: # %entry
-; RV32SFB-NEXT:    bnez a4, .LBB27_2
+; RV32SFB-NEXT:    beqz a4, .LBB27_2
 ; RV32SFB-NEXT:  # %bb.1: # %entry
-; RV32SFB-NEXT:    xori a2, a0, 321
+; RV32SFB-NEXT:    mv a1, a3
 ; RV32SFB-NEXT:  .LBB27_2: # %entry
-; RV32SFB-NEXT:    beqz a4, .LBB27_4
+; RV32SFB-NEXT:    bnez a4, .LBB27_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
-; RV32SFB-NEXT:    mv a1, a3
+; RV32SFB-NEXT:    xori a2, a0, 321
 ; RV32SFB-NEXT:  .LBB27_4: # %entry
 ; RV32SFB-NEXT:    mv a0, a2
 ; RV32SFB-NEXT:    ret
@@ -1272,11 +1272,11 @@ define i64 @select_srli(i64 %A, i64 %C, i1 zeroext %cond) {
 ; RV32SFB-NEXT:    mv a0, a2
 ; RV32SFB-NEXT:    bnez a4, .LBB29_2
 ; RV32SFB-NEXT:  # %bb.1: # %entry
-; RV32SFB-NEXT:    li a3, 0
+; RV32SFB-NEXT:    srli a0, a1, 3
 ; RV32SFB-NEXT:  .LBB29_2: # %entry
 ; RV32SFB-NEXT:    bnez a4, .LBB29_4
 ; RV32SFB-NEXT:  # %bb.3: # %entry
-; RV32SFB-NEXT:    srli a0, a1, 3
+; RV32SFB-NEXT:    li a3, 0
 ; RV32SFB-NEXT:  .LBB29_4: # %entry
 ; RV32SFB-NEXT:    mv a1, a3
 ; RV32SFB-NEXT:    ret



More information about the llvm-commits mailing list