[llvm] [RISCV] Add tune info for postra scheduling direction (PR #115864)
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 9 23:53:53 PST 2024
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/115864
>From c3841ef55c65d0090e020e2e8578979f253677ec Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Wed, 13 Nov 2024 15:11:18 +0800
Subject: [PATCH] [RISCV] Add tune info for postra scheduling direction
The results differ on different platforms so it is really hard to
determine a common default value.
Tune info for postra scheduling direction is added and CPUs can
set their own preferable postra scheduling direction.
We set the default value to `bidirectional` as it may be the most
balanced direction.
---
llvm/include/llvm/CodeGen/MachineScheduler.h | 8 +
llvm/lib/CodeGen/MachineScheduler.cpp | 8 +-
llvm/lib/Target/RISCV/RISCVProcessors.td | 13 +-
llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 17 +-
llvm/lib/Target/RISCV/RISCVSubtarget.h | 11 ++
.../RISCV/machine-combiner-strategies.ll | 2 +-
llvm/test/CodeGen/RISCV/machine-combiner.ll | 166 +++++++++---------
.../CodeGen/RISCV/misched-mem-clustering.mir | 8 +-
.../RISCV/rvv/vxrm-insert-out-of-loop.ll | 58 +++---
.../CodeGen/RISCV/short-forward-branch-opt.ll | 116 ++++++------
10 files changed, 223 insertions(+), 184 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h
index 42d132efec2e7c..2b8dd290565c14 100644
--- a/llvm/include/llvm/CodeGen/MachineScheduler.h
+++ b/llvm/include/llvm/CodeGen/MachineScheduler.h
@@ -123,6 +123,14 @@ class TargetInstrInfo;
class TargetPassConfig;
class TargetRegisterInfo;
+namespace MISchedPostRASched {
+enum Direction {
+ TopDown,
+ BottomUp,
+ Bidirectional,
+};
+} // end namespace MISchedPostRASched
+
/// MachineSchedContext provides enough context from the MachineScheduler pass
/// for the target to instantiate a scheduler.
struct MachineSchedContext {
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 1722bdda99e4af..d81151fa44f741 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -81,13 +81,7 @@ cl::opt<bool> ForceTopDown("misched-topdown", cl::Hidden,
cl::desc("Force top-down list scheduling"));
cl::opt<bool> ForceBottomUp("misched-bottomup", cl::Hidden,
cl::desc("Force bottom-up list scheduling"));
-namespace MISchedPostRASched {
-enum Direction {
- TopDown,
- BottomUp,
- Bidirectional,
-};
-} // end namespace MISchedPostRASched
+
cl::opt<MISchedPostRASched::Direction> PostRADirection(
"misched-postra-direction", cl::Hidden,
cl::desc("Post reg-alloc list scheduling direction"),
diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td
index c4e19c515b155b..7ca383f1eb15d8 100644
--- a/llvm/lib/Target/RISCV/RISCVProcessors.td
+++ b/llvm/lib/Target/RISCV/RISCVProcessors.td
@@ -10,6 +10,11 @@
// RISC-V processors supported.
//===----------------------------------------------------------------------===//
+// Predefined scheduling direction.
+defvar TopDown = [{ MISchedPostRASched::TopDown }];
+defvar BottomUp = [{ MISchedPostRASched::BottomUp }];
+defvar Bidirectional = [{ MISchedPostRASched::Bidirectional }];
+
class RISCVTuneInfo {
bits<8> PrefFunctionAlignment = 1;
bits<8> PrefLoopAlignment = 1;
@@ -37,6 +42,11 @@ class RISCVTuneInfo {
bits<32> MaxLoadsPerMemcmpOptSize = 4;
bits<32> MaxLoadsPerMemcmp = 8;
+
+ // The direction of PostRA scheduling.
+ // Do bidirectional scheduling by default since it provides a more balanced
+ // scheduling leading to better performance. This will increase compile time.
+ code PostRASchedDirection = Bidirectional;
}
def RISCVTuneInfoTable : GenericTable {
@@ -49,7 +59,8 @@ def RISCVTuneInfoTable : GenericTable {
"MaxStoresPerMemset", "MaxGluedStoresPerMemcpy",
"MaxStoresPerMemcpyOptSize", "MaxStoresPerMemcpy",
"MaxStoresPerMemmoveOptSize", "MaxStoresPerMemmove",
- "MaxLoadsPerMemcmpOptSize", "MaxLoadsPerMemcmp"];
+ "MaxLoadsPerMemcmpOptSize", "MaxLoadsPerMemcmp",
+ "PostRASchedDirection"];
}
def getRISCVTuneInfo : SearchIndex {
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 38443e8646de40..f69bb4ba4e1311 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -16,7 +16,6 @@
#include "RISCV.h"
#include "RISCVFrameLowering.h"
#include "RISCVTargetMachine.h"
-#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/MacroFusion.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
#include "llvm/MC/TargetRegistry.h"
@@ -211,3 +210,19 @@ void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
// register-pressure tracking. This will increase compile time.
Policy.ShouldTrackPressure = true;
}
+
+void RISCVSubtarget::overridePostRASchedPolicy(MachineSchedPolicy &Policy,
+ unsigned NumRegionInstrs) const {
+ MISchedPostRASched::Direction PostRASchedDirection =
+ getPostRASchedDirection();
+ if (PostRASchedDirection == MISchedPostRASched::TopDown) {
+ Policy.OnlyTopDown = true;
+ Policy.OnlyBottomUp = false;
+ } else if (PostRASchedDirection == MISchedPostRASched::BottomUp) {
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = true;
+ } else if (PostRASchedDirection == MISchedPostRASched::Bidirectional) {
+ Policy.OnlyTopDown = false;
+ Policy.OnlyBottomUp = false;
+ }
+}
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 5e775d2f87bd94..143dff9e69e27c 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
@@ -66,6 +67,9 @@ struct RISCVTuneInfo {
unsigned MaxLoadsPerMemcmpOptSize;
unsigned MaxLoadsPerMemcmp;
+
+ // The direction of PostRA scheduling.
+ MISchedPostRASched::Direction PostRASchedDirection;
};
#define GET_RISCVTuneInfoTable_DECL
@@ -362,8 +366,15 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
: TuneInfo->MaxLoadsPerMemcmp;
}
+ MISchedPostRASched::Direction getPostRASchedDirection() const {
+ return TuneInfo->PostRASchedDirection;
+ }
+
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
+
+ void overridePostRASchedPolicy(MachineSchedPolicy &Policy,
+ unsigned NumRegionInstrs) const override;
};
} // End llvm namespace
diff --git a/llvm/test/CodeGen/RISCV/machine-combiner-strategies.ll b/llvm/test/CodeGen/RISCV/machine-combiner-strategies.ll
index 7a5d98f5976bf1..c348752cbc7f49 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner-strategies.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner-strategies.ll
@@ -50,8 +50,8 @@ define i32 @test_local_strategy(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32
; CHECK_LOCAL_SIFIVE_U74-NEXT: # %bb.1: # %b2
; CHECK_LOCAL_SIFIVE_U74-NEXT: ret
; CHECK_LOCAL_SIFIVE_U74-NEXT: .LBB0_2: # %b1
-; CHECK_LOCAL_SIFIVE_U74-NEXT: add a3, a3, a4
; CHECK_LOCAL_SIFIVE_U74-NEXT: add a0, a0, a5
+; CHECK_LOCAL_SIFIVE_U74-NEXT: add a3, a3, a4
; CHECK_LOCAL_SIFIVE_U74-NEXT: addw a0, a0, a3
; CHECK_LOCAL_SIFIVE_U74-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/machine-combiner.ll b/llvm/test/CodeGen/RISCV/machine-combiner.ll
index a18f5d6902dca7..4cee183b33ec7b 100644
--- a/llvm/test/CodeGen/RISCV/machine-combiner.ll
+++ b/llvm/test/CodeGen/RISCV/machine-combiner.ll
@@ -10,8 +10,8 @@
define double @test_reassoc_fadd1(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fadd1:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa2, fa3
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa5, fa2, fa3
; CHECK-NEXT: fadd.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -23,8 +23,8 @@ define double @test_reassoc_fadd1(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fadd2(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fadd2:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa2, fa3
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa5, fa2, fa3
; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -36,8 +36,8 @@ define double @test_reassoc_fadd2(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fadd3(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fadd3:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa3, fa2
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa5, fa3, fa2
; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -49,8 +49,8 @@ define double @test_reassoc_fadd3(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fadd4(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fadd4:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa3, fa2
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa5, fa3, fa2
; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -62,8 +62,8 @@ define double @test_reassoc_fadd4(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fmul1(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fmul1:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmul.d fa5, fa2, fa3
; CHECK-NEXT: fmul.d fa4, fa0, fa1
+; CHECK-NEXT: fmul.d fa5, fa2, fa3
; CHECK-NEXT: fmul.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fmul nsz reassoc double %a0, %a1
@@ -75,8 +75,8 @@ define double @test_reassoc_fmul1(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fmul2(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fmul2:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmul.d fa5, fa2, fa3
; CHECK-NEXT: fmul.d fa4, fa0, fa1
+; CHECK-NEXT: fmul.d fa5, fa2, fa3
; CHECK-NEXT: fmul.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fmul nsz reassoc double %a0, %a1
@@ -88,8 +88,8 @@ define double @test_reassoc_fmul2(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fmul3(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fmul3:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmul.d fa5, fa3, fa2
; CHECK-NEXT: fmul.d fa4, fa0, fa1
+; CHECK-NEXT: fmul.d fa5, fa3, fa2
; CHECK-NEXT: fmul.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fmul nsz reassoc double %a0, %a1
@@ -101,8 +101,8 @@ define double @test_reassoc_fmul3(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fmul4(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fmul4:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmul.d fa5, fa3, fa2
; CHECK-NEXT: fmul.d fa4, fa0, fa1
+; CHECK-NEXT: fmul.d fa5, fa3, fa2
; CHECK-NEXT: fmul.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fmul nsz reassoc double %a0, %a1
@@ -133,10 +133,10 @@ define double @test_reassoc_big1(double %a0, double %a1, double %a2, double %a3,
define double @test_reassoc_big2(double %a0, double %a1, i32 %a2, double %a3, i32 %a4, double %a5) {
; CHECK-LABEL: test_reassoc_big2:
; CHECK: # %bb.0:
+; CHECK-NEXT: fcvt.d.w fa5, a1
; CHECK-NEXT: fsub.d fa4, fa3, fa2
; CHECK-NEXT: fadd.d fa3, fa0, fa1
; CHECK-NEXT: fadd.d ft0, fa2, fa1
-; CHECK-NEXT: fcvt.d.w fa5, a1
; CHECK-NEXT: fcvt.d.w ft1, a0
; CHECK-NEXT: fmul.d fa5, fa5, fa1
; CHECK-NEXT: fmul.d fa2, fa2, ft1
@@ -192,8 +192,8 @@ define double @test_reassoc_fadd_flags_2(double %a0, double %a1, double %a2, dou
define double @test_fmadd1(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_fmadd1:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmadd.d fa5, fa0, fa1, fa2
; CHECK-NEXT: fmadd.d fa4, fa0, fa1, fa3
+; CHECK-NEXT: fmadd.d fa5, fa0, fa1, fa2
; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fmul contract double %a0, %a1
@@ -206,8 +206,8 @@ define double @test_fmadd1(double %a0, double %a1, double %a2, double %a3) {
define double @test_fmadd2(double %a0, double %a1, double %a2) {
; CHECK-LABEL: test_fmadd2:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmul.d fa5, fa0, fa1
; CHECK-NEXT: fmadd.d fa4, fa0, fa1, fa2
+; CHECK-NEXT: fmul.d fa5, fa0, fa1
; CHECK-NEXT: fdiv.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fmul contract double %a0, %a1
@@ -219,8 +219,8 @@ define double @test_fmadd2(double %a0, double %a1, double %a2) {
define double @test_fmsub(double %a0, double %a1, double %a2) {
; CHECK-LABEL: test_fmsub:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmul.d fa5, fa0, fa1
; CHECK-NEXT: fmsub.d fa4, fa0, fa1, fa2
+; CHECK-NEXT: fmul.d fa5, fa0, fa1
; CHECK-NEXT: fdiv.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fmul contract double %a0, %a1
@@ -232,8 +232,8 @@ define double @test_fmsub(double %a0, double %a1, double %a2) {
define double @test_fnmsub(double %a0, double %a1, double %a2) {
; CHECK-LABEL: test_fnmsub:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmul.d fa5, fa0, fa1
; CHECK-NEXT: fnmsub.d fa4, fa0, fa1, fa2
+; CHECK-NEXT: fmul.d fa5, fa0, fa1
; CHECK-NEXT: fdiv.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fmul contract double %a0, %a1
@@ -245,8 +245,8 @@ define double @test_fnmsub(double %a0, double %a1, double %a2) {
define double @test_reassoc_fsub1(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub1:
; CHECK: # %bb.0:
-; CHECK-NEXT: fsub.d fa5, fa2, fa3
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa5, fa2, fa3
; CHECK-NEXT: fadd.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -258,8 +258,8 @@ define double @test_reassoc_fsub1(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub2(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub2:
; CHECK: # %bb.0:
-; CHECK-NEXT: fsub.d fa5, fa2, fa3
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa5, fa2, fa3
; CHECK-NEXT: fsub.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -271,8 +271,8 @@ define double @test_reassoc_fsub2(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub3(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub3:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa2, fa3
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa5, fa2, fa3
; CHECK-NEXT: fsub.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -284,8 +284,8 @@ define double @test_reassoc_fsub3(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub4(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub4:
; CHECK: # %bb.0:
-; CHECK-NEXT: fsub.d fa5, fa2, fa3
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa5, fa2, fa3
; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -297,8 +297,8 @@ define double @test_reassoc_fsub4(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub5(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub5:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa2, fa3
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa5, fa2, fa3
; CHECK-NEXT: fsub.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -310,8 +310,8 @@ define double @test_reassoc_fsub5(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub6(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub6:
; CHECK: # %bb.0:
-; CHECK-NEXT: fsub.d fa5, fa2, fa3
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa5, fa2, fa3
; CHECK-NEXT: fsub.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -323,8 +323,8 @@ define double @test_reassoc_fsub6(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub7(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub7:
; CHECK: # %bb.0:
-; CHECK-NEXT: fsub.d fa5, fa3, fa2
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa5, fa3, fa2
; CHECK-NEXT: fsub.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -336,8 +336,8 @@ define double @test_reassoc_fsub7(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub8(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub8:
; CHECK: # %bb.0:
-; CHECK-NEXT: fsub.d fa5, fa3, fa2
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa5, fa3, fa2
; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -349,8 +349,8 @@ define double @test_reassoc_fsub8(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub9(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub9:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa3, fa2
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa5, fa3, fa2
; CHECK-NEXT: fsub.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -362,8 +362,8 @@ define double @test_reassoc_fsub9(double %a0, double %a1, double %a2, double %a3
define double @test_reassoc_fsub10(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub10:
; CHECK: # %bb.0:
-; CHECK-NEXT: fsub.d fa5, fa3, fa2
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa5, fa3, fa2
; CHECK-NEXT: fsub.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -375,8 +375,8 @@ define double @test_reassoc_fsub10(double %a0, double %a1, double %a2, double %a
define double @test_reassoc_fsub11(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub11:
; CHECK: # %bb.0:
-; CHECK-NEXT: fadd.d fa5, fa3, fa2
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fadd.d fa5, fa3, fa2
; CHECK-NEXT: fsub.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -388,8 +388,8 @@ define double @test_reassoc_fsub11(double %a0, double %a1, double %a2, double %a
define double @test_reassoc_fsub12(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_reassoc_fsub12:
; CHECK: # %bb.0:
-; CHECK-NEXT: fsub.d fa5, fa3, fa2
; CHECK-NEXT: fadd.d fa4, fa0, fa1
+; CHECK-NEXT: fsub.d fa5, fa3, fa2
; CHECK-NEXT: fadd.d fa0, fa5, fa4
; CHECK-NEXT: ret
%t0 = fadd nsz reassoc double %a0, %a1
@@ -401,8 +401,8 @@ define double @test_reassoc_fsub12(double %a0, double %a1, double %a2, double %a
define i8 @test_reassoc_add_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_add_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a2, a2, a3
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: ret
%t0 = add i8 %a0, %a1
@@ -414,8 +414,8 @@ define i8 @test_reassoc_add_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
define i16 @test_reassoc_add_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_add_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a2, a2, a3
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: ret
%t0 = add i16 %a0, %a1
@@ -427,8 +427,8 @@ define i16 @test_reassoc_add_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
define i32 @test_reassoc_add_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_add_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a2, a2, a3
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: addw a0, a0, a2
; CHECK-NEXT: ret
%t0 = add i32 %a0, %a1
@@ -440,8 +440,8 @@ define i32 @test_reassoc_add_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
define i64 @test_reassoc_add_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
; CHECK-LABEL: test_reassoc_add_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a2, a2, a3
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: ret
%t0 = add i64 %a0, %a1
@@ -453,8 +453,8 @@ define i64 @test_reassoc_add_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define i32 @test_reassoc_add_sub_i32_1(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_add_sub_i32_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: subw a2, a2, a3
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: subw a0, a0, a2
; CHECK-NEXT: ret
%t0 = add i32 %a0, %a1
@@ -466,8 +466,8 @@ define i32 @test_reassoc_add_sub_i32_1(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
define i32 @test_reassoc_add_sub_i32_2(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_add_sub_i32_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: subw a2, a2, a3
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: addw a0, a0, a2
; CHECK-NEXT: ret
%t0 = add i32 %a0, %a1
@@ -479,8 +479,8 @@ define i32 @test_reassoc_add_sub_i32_2(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
define i32 @test_reassoc_add_sub_i32_3(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_add_sub_i32_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a2, a2, a3
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: subw a0, a0, a2
; CHECK-NEXT: ret
%t0 = add i32 %a0, %a1
@@ -492,8 +492,8 @@ define i32 @test_reassoc_add_sub_i32_3(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
define i64 @test_reassoc_add_sub_i64_1(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
; CHECK-LABEL: test_reassoc_add_sub_i64_1:
; CHECK: # %bb.0:
-; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: sub a2, a2, a3
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: sub a0, a0, a2
; CHECK-NEXT: ret
%t0 = add i64 %a0, %a1
@@ -505,8 +505,8 @@ define i64 @test_reassoc_add_sub_i64_1(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define i64 @test_reassoc_add_sub_i64_2(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
; CHECK-LABEL: test_reassoc_add_sub_i64_2:
; CHECK: # %bb.0:
-; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: sub a2, a2, a3
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a0, a0, a2
; CHECK-NEXT: ret
%t0 = add i64 %a0, %a1
@@ -518,8 +518,8 @@ define i64 @test_reassoc_add_sub_i64_2(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define i64 @test_reassoc_add_sub_i64_3(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
; CHECK-LABEL: test_reassoc_add_sub_i64_3:
; CHECK: # %bb.0:
-; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: add a2, a2, a3
+; CHECK-NEXT: add a0, a0, a1
; CHECK-NEXT: sub a0, a0, a2
; CHECK-NEXT: ret
%t0 = add i64 %a0, %a1
@@ -531,8 +531,8 @@ define i64 @test_reassoc_add_sub_i64_3(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define i8 @test_reassoc_and_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_and_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: and a2, a2, a3
+; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: and a0, a0, a2
; CHECK-NEXT: ret
%t0 = and i8 %a0, %a1
@@ -544,8 +544,8 @@ define i8 @test_reassoc_and_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
define i16 @test_reassoc_and_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_and_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: and a2, a2, a3
+; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: and a0, a0, a2
; CHECK-NEXT: ret
%t0 = and i16 %a0, %a1
@@ -557,8 +557,8 @@ define i16 @test_reassoc_and_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
define i32 @test_reassoc_and_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_and_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: and a2, a2, a3
+; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: and a0, a0, a2
; CHECK-NEXT: ret
%t0 = and i32 %a0, %a1
@@ -570,8 +570,8 @@ define i32 @test_reassoc_and_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
define i64 @test_reassoc_and_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
; CHECK-LABEL: test_reassoc_and_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: and a2, a2, a3
+; CHECK-NEXT: and a0, a0, a1
; CHECK-NEXT: and a0, a0, a2
; CHECK-NEXT: ret
%t0 = and i64 %a0, %a1
@@ -583,8 +583,8 @@ define i64 @test_reassoc_and_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define i8 @test_reassoc_or_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_or_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: or a2, a2, a3
+; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: ret
%t0 = or i8 %a0, %a1
@@ -596,8 +596,8 @@ define i8 @test_reassoc_or_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
define i16 @test_reassoc_or_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_or_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: or a2, a2, a3
+; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: ret
%t0 = or i16 %a0, %a1
@@ -609,8 +609,8 @@ define i16 @test_reassoc_or_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
define i32 @test_reassoc_or_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_or_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: or a2, a2, a3
+; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: ret
%t0 = or i32 %a0, %a1
@@ -622,8 +622,8 @@ define i32 @test_reassoc_or_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
define i64 @test_reassoc_or_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
; CHECK-LABEL: test_reassoc_or_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: or a2, a2, a3
+; CHECK-NEXT: or a0, a0, a1
; CHECK-NEXT: or a0, a0, a2
; CHECK-NEXT: ret
%t0 = or i64 %a0, %a1
@@ -635,8 +635,8 @@ define i64 @test_reassoc_or_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define i8 @test_reassoc_xor_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_xor_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: xor a2, a2, a3
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: xor a0, a0, a2
; CHECK-NEXT: ret
%t0 = xor i8 %a0, %a1
@@ -648,8 +648,8 @@ define i8 @test_reassoc_xor_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
define i16 @test_reassoc_xor_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_xor_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: xor a2, a2, a3
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: xor a0, a0, a2
; CHECK-NEXT: ret
%t0 = xor i16 %a0, %a1
@@ -661,8 +661,8 @@ define i16 @test_reassoc_xor_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
define i32 @test_reassoc_xor_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_xor_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: xor a2, a2, a3
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: xor a0, a0, a2
; CHECK-NEXT: ret
%t0 = xor i32 %a0, %a1
@@ -674,8 +674,8 @@ define i32 @test_reassoc_xor_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
define i64 @test_reassoc_xor_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
; CHECK-LABEL: test_reassoc_xor_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: xor a2, a2, a3
+; CHECK-NEXT: xor a0, a0, a1
; CHECK-NEXT: xor a0, a0, a2
; CHECK-NEXT: ret
%t0 = xor i64 %a0, %a1
@@ -687,8 +687,8 @@ define i64 @test_reassoc_xor_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define i8 @test_reassoc_mul_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_mul_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: ret
%t0 = mul i8 %a0, %a1
@@ -700,8 +700,8 @@ define i8 @test_reassoc_mul_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
define i16 @test_reassoc_mul_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_mul_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: ret
%t0 = mul i16 %a0, %a1
@@ -713,8 +713,8 @@ define i16 @test_reassoc_mul_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
define i32 @test_reassoc_mul_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_mul_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: mulw a0, a0, a2
; CHECK-NEXT: ret
%t0 = mul i32 %a0, %a1
@@ -726,8 +726,8 @@ define i32 @test_reassoc_mul_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
define i64 @test_reassoc_mul_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
; CHECK-LABEL: test_reassoc_mul_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: ret
%t0 = mul i64 %a0, %a1
@@ -739,11 +739,11 @@ define i64 @test_reassoc_mul_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define i8 @test_reassoc_minu_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_minu_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a3, a3, 255
; CHECK-NEXT: andi a1, a1, 255
; CHECK-NEXT: andi a0, a0, 255
-; CHECK-NEXT: andi a2, a2, 255
; CHECK-NEXT: minu a0, a0, a1
+; CHECK-NEXT: andi a2, a2, 255
+; CHECK-NEXT: andi a3, a3, 255
; CHECK-NEXT: minu a1, a2, a3
; CHECK-NEXT: minu a0, a0, a1
; CHECK-NEXT: ret
@@ -756,11 +756,11 @@ define i8 @test_reassoc_minu_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
define i16 @test_reassoc_minu_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_minu_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: zext.h a3, a3
; CHECK-NEXT: zext.h a1, a1
; CHECK-NEXT: zext.h a0, a0
-; CHECK-NEXT: zext.h a2, a2
; CHECK-NEXT: minu a0, a0, a1
+; CHECK-NEXT: zext.h a2, a2
+; CHECK-NEXT: zext.h a3, a3
; CHECK-NEXT: minu a1, a2, a3
; CHECK-NEXT: minu a0, a0, a1
; CHECK-NEXT: ret
@@ -773,11 +773,11 @@ define i16 @test_reassoc_minu_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
define i32 @test_reassoc_minu_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_minu_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: sext.w a3, a3
; CHECK-NEXT: sext.w a1, a1
; CHECK-NEXT: sext.w a0, a0
-; CHECK-NEXT: sext.w a2, a2
; CHECK-NEXT: minu a0, a0, a1
+; CHECK-NEXT: sext.w a2, a2
+; CHECK-NEXT: sext.w a3, a3
; CHECK-NEXT: minu a1, a2, a3
; CHECK-NEXT: minu a0, a0, a1
; CHECK-NEXT: ret
@@ -803,11 +803,11 @@ define i64 @test_reassoc_minu_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define i8 @test_reassoc_min_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_min_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: sext.b a3, a3
; CHECK-NEXT: sext.b a1, a1
; CHECK-NEXT: sext.b a0, a0
-; CHECK-NEXT: sext.b a2, a2
; CHECK-NEXT: min a0, a0, a1
+; CHECK-NEXT: sext.b a2, a2
+; CHECK-NEXT: sext.b a3, a3
; CHECK-NEXT: min a1, a2, a3
; CHECK-NEXT: min a0, a0, a1
; CHECK-NEXT: ret
@@ -820,11 +820,11 @@ define i8 @test_reassoc_min_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
define i16 @test_reassoc_min_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_min_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: sext.h a3, a3
; CHECK-NEXT: sext.h a1, a1
; CHECK-NEXT: sext.h a0, a0
-; CHECK-NEXT: sext.h a2, a2
; CHECK-NEXT: min a0, a0, a1
+; CHECK-NEXT: sext.h a2, a2
+; CHECK-NEXT: sext.h a3, a3
; CHECK-NEXT: min a1, a2, a3
; CHECK-NEXT: min a0, a0, a1
; CHECK-NEXT: ret
@@ -837,11 +837,11 @@ define i16 @test_reassoc_min_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
define i32 @test_reassoc_min_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_min_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: sext.w a3, a3
; CHECK-NEXT: sext.w a1, a1
; CHECK-NEXT: sext.w a0, a0
-; CHECK-NEXT: sext.w a2, a2
; CHECK-NEXT: min a0, a0, a1
+; CHECK-NEXT: sext.w a2, a2
+; CHECK-NEXT: sext.w a3, a3
; CHECK-NEXT: min a1, a2, a3
; CHECK-NEXT: min a0, a0, a1
; CHECK-NEXT: ret
@@ -867,11 +867,11 @@ define i64 @test_reassoc_min_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define i8 @test_reassoc_maxu_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_maxu_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: andi a3, a3, 255
; CHECK-NEXT: andi a1, a1, 255
; CHECK-NEXT: andi a0, a0, 255
-; CHECK-NEXT: andi a2, a2, 255
; CHECK-NEXT: maxu a0, a0, a1
+; CHECK-NEXT: andi a2, a2, 255
+; CHECK-NEXT: andi a3, a3, 255
; CHECK-NEXT: maxu a1, a2, a3
; CHECK-NEXT: maxu a0, a0, a1
; CHECK-NEXT: ret
@@ -884,11 +884,11 @@ define i8 @test_reassoc_maxu_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
define i16 @test_reassoc_maxu_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_maxu_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: zext.h a3, a3
; CHECK-NEXT: zext.h a1, a1
; CHECK-NEXT: zext.h a0, a0
-; CHECK-NEXT: zext.h a2, a2
; CHECK-NEXT: maxu a0, a0, a1
+; CHECK-NEXT: zext.h a2, a2
+; CHECK-NEXT: zext.h a3, a3
; CHECK-NEXT: maxu a1, a2, a3
; CHECK-NEXT: maxu a0, a0, a1
; CHECK-NEXT: ret
@@ -901,11 +901,11 @@ define i16 @test_reassoc_maxu_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
define i32 @test_reassoc_maxu_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_maxu_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: sext.w a3, a3
; CHECK-NEXT: sext.w a1, a1
; CHECK-NEXT: sext.w a0, a0
-; CHECK-NEXT: sext.w a2, a2
; CHECK-NEXT: maxu a0, a0, a1
+; CHECK-NEXT: sext.w a2, a2
+; CHECK-NEXT: sext.w a3, a3
; CHECK-NEXT: maxu a1, a2, a3
; CHECK-NEXT: maxu a0, a0, a1
; CHECK-NEXT: ret
@@ -931,11 +931,11 @@ define i64 @test_reassoc_maxu_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define i8 @test_reassoc_max_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
; CHECK-LABEL: test_reassoc_max_i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: sext.b a3, a3
; CHECK-NEXT: sext.b a1, a1
; CHECK-NEXT: sext.b a0, a0
-; CHECK-NEXT: sext.b a2, a2
; CHECK-NEXT: max a0, a0, a1
+; CHECK-NEXT: sext.b a2, a2
+; CHECK-NEXT: sext.b a3, a3
; CHECK-NEXT: max a1, a2, a3
; CHECK-NEXT: max a0, a0, a1
; CHECK-NEXT: ret
@@ -948,11 +948,11 @@ define i8 @test_reassoc_max_i8(i8 %a0, i8 %a1, i8 %a2, i8 %a3) {
define i16 @test_reassoc_max_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
; CHECK-LABEL: test_reassoc_max_i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: sext.h a3, a3
; CHECK-NEXT: sext.h a1, a1
; CHECK-NEXT: sext.h a0, a0
-; CHECK-NEXT: sext.h a2, a2
; CHECK-NEXT: max a0, a0, a1
+; CHECK-NEXT: sext.h a2, a2
+; CHECK-NEXT: sext.h a3, a3
; CHECK-NEXT: max a1, a2, a3
; CHECK-NEXT: max a0, a0, a1
; CHECK-NEXT: ret
@@ -965,11 +965,11 @@ define i16 @test_reassoc_max_i16(i16 %a0, i16 %a1, i16 %a2, i16 %a3) {
define i32 @test_reassoc_max_i32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
; CHECK-LABEL: test_reassoc_max_i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: sext.w a3, a3
; CHECK-NEXT: sext.w a1, a1
; CHECK-NEXT: sext.w a0, a0
-; CHECK-NEXT: sext.w a2, a2
; CHECK-NEXT: max a0, a0, a1
+; CHECK-NEXT: sext.w a2, a2
+; CHECK-NEXT: sext.w a3, a3
; CHECK-NEXT: max a1, a2, a3
; CHECK-NEXT: max a0, a0, a1
; CHECK-NEXT: ret
@@ -995,8 +995,8 @@ define i64 @test_reassoc_max_i64(i64 %a0, i64 %a1, i64 %a2, i64 %a3) {
define half @test_fmin_f16(half %a0, half %a1, half %a2, half %a3) {
; CHECK-LABEL: test_fmin_f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmin.h fa5, fa2, fa3
; CHECK-NEXT: fmin.h fa4, fa0, fa1
+; CHECK-NEXT: fmin.h fa5, fa2, fa3
; CHECK-NEXT: fmin.h fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = call half @llvm.minnum.f16(half %a0, half %a1)
@@ -1008,8 +1008,8 @@ define half @test_fmin_f16(half %a0, half %a1, half %a2, half %a3) {
define float @test_fmin_f32(float %a0, float %a1, float %a2, float %a3) {
; CHECK-LABEL: test_fmin_f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmin.s fa5, fa2, fa3
; CHECK-NEXT: fmin.s fa4, fa0, fa1
+; CHECK-NEXT: fmin.s fa5, fa2, fa3
; CHECK-NEXT: fmin.s fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = call float @llvm.minnum.f32(float %a0, float %a1)
@@ -1021,8 +1021,8 @@ define float @test_fmin_f32(float %a0, float %a1, float %a2, float %a3) {
define double @test_fmin_f64(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_fmin_f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmin.d fa5, fa2, fa3
; CHECK-NEXT: fmin.d fa4, fa0, fa1
+; CHECK-NEXT: fmin.d fa5, fa2, fa3
; CHECK-NEXT: fmin.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = call double @llvm.minnum.f64(double %a0, double %a1)
@@ -1034,8 +1034,8 @@ define double @test_fmin_f64(double %a0, double %a1, double %a2, double %a3) {
define half @test_fmax_f16(half %a0, half %a1, half %a2, half %a3) {
; CHECK-LABEL: test_fmax_f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmax.h fa5, fa2, fa3
; CHECK-NEXT: fmax.h fa4, fa0, fa1
+; CHECK-NEXT: fmax.h fa5, fa2, fa3
; CHECK-NEXT: fmax.h fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = call half @llvm.maxnum.f16(half %a0, half %a1)
@@ -1047,8 +1047,8 @@ define half @test_fmax_f16(half %a0, half %a1, half %a2, half %a3) {
define float @test_fmax_f32(float %a0, float %a1, float %a2, float %a3) {
; CHECK-LABEL: test_fmax_f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmax.s fa5, fa2, fa3
; CHECK-NEXT: fmax.s fa4, fa0, fa1
+; CHECK-NEXT: fmax.s fa5, fa2, fa3
; CHECK-NEXT: fmax.s fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = call float @llvm.maxnum.f32(float %a0, float %a1)
@@ -1060,8 +1060,8 @@ define float @test_fmax_f32(float %a0, float %a1, float %a2, float %a3) {
define double @test_fmax_f64(double %a0, double %a1, double %a2, double %a3) {
; CHECK-LABEL: test_fmax_f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: fmax.d fa5, fa2, fa3
; CHECK-NEXT: fmax.d fa4, fa0, fa1
+; CHECK-NEXT: fmax.d fa5, fa2, fa3
; CHECK-NEXT: fmax.d fa0, fa4, fa5
; CHECK-NEXT: ret
%t0 = call double @llvm.maxnum.f64(double %a0, double %a1)
@@ -1097,9 +1097,9 @@ define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a
; CHECK_LOCAL-LABEL: test_fmadd_strategy:
; CHECK_LOCAL: # %bb.0: # %entry
; CHECK_LOCAL-NEXT: fsub.d fa4, fa0, fa1
-; CHECK_LOCAL-NEXT: andi a0, a0, 1
; CHECK_LOCAL-NEXT: fmv.d fa5, fa0
; CHECK_LOCAL-NEXT: fmul.d fa0, fa4, fa2
+; CHECK_LOCAL-NEXT: andi a0, a0, 1
; CHECK_LOCAL-NEXT: beqz a0, .LBB76_2
; CHECK_LOCAL-NEXT: # %bb.1: # %entry
; CHECK_LOCAL-NEXT: fmul.d fa4, fa5, fa1
@@ -1111,9 +1111,9 @@ define double @test_fmadd_strategy(double %a0, double %a1, double %a2, double %a
; CHECK_GLOBAL-LABEL: test_fmadd_strategy:
; CHECK_GLOBAL: # %bb.0: # %entry
; CHECK_GLOBAL-NEXT: fsub.d fa4, fa0, fa1
-; CHECK_GLOBAL-NEXT: andi a0, a0, 1
; CHECK_GLOBAL-NEXT: fmv.d fa5, fa0
; CHECK_GLOBAL-NEXT: fmul.d fa0, fa4, fa2
+; CHECK_GLOBAL-NEXT: andi a0, a0, 1
; CHECK_GLOBAL-NEXT: beqz a0, .LBB76_2
; CHECK_GLOBAL-NEXT: # %bb.1: # %entry
; CHECK_GLOBAL-NEXT: fmul.d fa5, fa5, fa1
diff --git a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
index 21398d315ec93a..9ae559d5551ec7 100644
--- a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
+++ b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
@@ -3,12 +3,12 @@
# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=NOPOSTMISCHED %s
-# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
-# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
+# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -misched-postra-direction=topdown -verify-misched \
+# RUN: -enable-post-misched=true -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=NOCLUSTER %s
-# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
-# RUN: -debug-only=machine-scheduler \
+# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -misched-postra-direction=topdown -verify-misched \
+# RUN: -enable-post-misched=true -debug-only=machine-scheduler \
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=MEMCLUSTER %s
diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
index c35f05be304cce..506be459c768f0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert-out-of-loop.ll
@@ -156,40 +156,37 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64P670-NEXT: # %bb.2: # %for.cond1.preheader.us.preheader
; RV64P670-NEXT: addi sp, sp, -48
; RV64P670-NEXT: .cfi_def_cfa_offset 48
-; RV64P670-NEXT: sd s0, 40(sp) # 8-byte Folded Spill
-; RV64P670-NEXT: sd s1, 32(sp) # 8-byte Folded Spill
-; RV64P670-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
-; RV64P670-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
; RV64P670-NEXT: sd s4, 8(sp) # 8-byte Folded Spill
+; RV64P670-NEXT: sd s3, 16(sp) # 8-byte Folded Spill
+; RV64P670-NEXT: sd s2, 24(sp) # 8-byte Folded Spill
+; RV64P670-NEXT: sd s1, 32(sp) # 8-byte Folded Spill
+; RV64P670-NEXT: sd s0, 40(sp) # 8-byte Folded Spill
; RV64P670-NEXT: .cfi_offset s0, -8
; RV64P670-NEXT: .cfi_offset s1, -16
; RV64P670-NEXT: .cfi_offset s2, -24
; RV64P670-NEXT: .cfi_offset s3, -32
; RV64P670-NEXT: .cfi_offset s4, -40
; RV64P670-NEXT: addi s1, a7, -1
-; RV64P670-NEXT: add s0, a0, a6
-; RV64P670-NEXT: li t0, 0
-; RV64P670-NEXT: li t1, 0
; RV64P670-NEXT: zext.w s1, s1
; RV64P670-NEXT: mul t2, a1, s1
+; RV64P670-NEXT: add s0, a0, a6
; RV64P670-NEXT: add t4, s0, t2
; RV64P670-NEXT: mul t2, a3, s1
; RV64P670-NEXT: add s0, a2, a6
-; RV64P670-NEXT: mul s1, a5, s1
; RV64P670-NEXT: add t3, s0, t2
+; RV64P670-NEXT: mul s1, a5, s1
; RV64P670-NEXT: add s0, a4, a6
-; RV64P670-NEXT: csrr t2, vlenb
; RV64P670-NEXT: add t5, s0, s1
+; RV64P670-NEXT: csrr t2, vlenb
; RV64P670-NEXT: sltu s1, a0, t3
; RV64P670-NEXT: sltu s0, a2, t4
-; RV64P670-NEXT: slli t3, t2, 1
; RV64P670-NEXT: and s0, s0, s1
; RV64P670-NEXT: or s1, a1, a3
; RV64P670-NEXT: slti s1, s1, 0
; RV64P670-NEXT: or t6, s0, s1
+; RV64P670-NEXT: slli t3, t2, 1
; RV64P670-NEXT: sltu s1, a0, t5
; RV64P670-NEXT: sltu s0, a4, t4
-; RV64P670-NEXT: mv t5, a0
; RV64P670-NEXT: and s0, s0, s1
; RV64P670-NEXT: or s1, a1, a5
; RV64P670-NEXT: slti s1, s1, 0
@@ -199,15 +196,18 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64P670-NEXT: or s0, t6, s0
; RV64P670-NEXT: sltu s1, a6, s1
; RV64P670-NEXT: or s0, s0, s1
+; RV64P670-NEXT: mv t5, a0
; RV64P670-NEXT: andi t4, s0, 1
+; RV64P670-NEXT: li t1, 0
+; RV64P670-NEXT: li t0, 0
; RV64P670-NEXT: j .LBB0_4
; RV64P670-NEXT: .LBB0_3: # %for.cond1.for.cond.cleanup3_crit_edge.us
; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1
-; RV64P670-NEXT: add t5, t5, a1
-; RV64P670-NEXT: add a2, a2, a3
-; RV64P670-NEXT: add a4, a4, a5
-; RV64P670-NEXT: addiw t1, t1, 1
; RV64P670-NEXT: addi t0, t0, 1
+; RV64P670-NEXT: addiw t1, t1, 1
+; RV64P670-NEXT: add a4, a4, a5
+; RV64P670-NEXT: add a2, a2, a3
+; RV64P670-NEXT: add t5, t5, a1
; RV64P670-NEXT: beq t1, a7, .LBB0_11
; RV64P670-NEXT: .LBB0_4: # %for.cond1.preheader.us
; RV64P670-NEXT: # =>This Loop Header: Depth=1
@@ -220,24 +220,24 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64P670-NEXT: .LBB0_6: # %vector.ph
; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1
; RV64P670-NEXT: slli s1, t2, 28
-; RV64P670-NEXT: mv s2, a2
-; RV64P670-NEXT: mv s3, a4
-; RV64P670-NEXT: mv s4, t5
; RV64P670-NEXT: sub s1, s1, t3
-; RV64P670-NEXT: vsetvli s0, zero, e8, m2, ta, ma
; RV64P670-NEXT: and t6, s1, a6
+; RV64P670-NEXT: vsetvli s0, zero, e8, m2, ta, ma
; RV64P670-NEXT: mv s1, t6
+; RV64P670-NEXT: mv s4, t5
+; RV64P670-NEXT: mv s3, a4
+; RV64P670-NEXT: mv s2, a2
; RV64P670-NEXT: .LBB0_7: # %vector.body
; RV64P670-NEXT: # Parent Loop BB0_4 Depth=1
; RV64P670-NEXT: # => This Inner Loop Header: Depth=2
; RV64P670-NEXT: vl2r.v v8, (s2)
-; RV64P670-NEXT: sub s1, s1, t3
-; RV64P670-NEXT: add s2, s2, t3
; RV64P670-NEXT: vl2r.v v10, (s3)
-; RV64P670-NEXT: add s3, s3, t3
; RV64P670-NEXT: vaaddu.vv v8, v8, v10
; RV64P670-NEXT: vs2r.v v8, (s4)
+; RV64P670-NEXT: add s2, s2, t3
+; RV64P670-NEXT: add s3, s3, t3
; RV64P670-NEXT: add s4, s4, t3
+; RV64P670-NEXT: sub s1, s1, t3
; RV64P670-NEXT: bnez s1, .LBB0_7
; RV64P670-NEXT: # %bb.8: # %middle.block
; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1
@@ -246,8 +246,8 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64P670-NEXT: # in Loop: Header=BB0_4 Depth=1
; RV64P670-NEXT: mul s2, a1, t0
; RV64P670-NEXT: add s0, a0, a6
-; RV64P670-NEXT: add s1, t5, t6
; RV64P670-NEXT: add s4, a4, t6
+; RV64P670-NEXT: add s1, t5, t6
; RV64P670-NEXT: add t6, t6, a2
; RV64P670-NEXT: add s2, s2, s0
; RV64P670-NEXT: .LBB0_10: # %for.body4.us
@@ -255,21 +255,21 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_
; RV64P670-NEXT: # => This Inner Loop Header: Depth=2
; RV64P670-NEXT: lbu s3, 0(t6)
; RV64P670-NEXT: lbu s0, 0(s4)
-; RV64P670-NEXT: addi s4, s4, 1
-; RV64P670-NEXT: addi t6, t6, 1
; RV64P670-NEXT: add s0, s0, s3
; RV64P670-NEXT: addi s0, s0, 1
; RV64P670-NEXT: srli s0, s0, 1
; RV64P670-NEXT: sb s0, 0(s1)
+; RV64P670-NEXT: addi t6, t6, 1
+; RV64P670-NEXT: addi s4, s4, 1
; RV64P670-NEXT: addi s1, s1, 1
; RV64P670-NEXT: bne s1, s2, .LBB0_10
; RV64P670-NEXT: j .LBB0_3
; RV64P670-NEXT: .LBB0_11:
-; RV64P670-NEXT: ld s0, 40(sp) # 8-byte Folded Reload
-; RV64P670-NEXT: ld s1, 32(sp) # 8-byte Folded Reload
-; RV64P670-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
-; RV64P670-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
; RV64P670-NEXT: ld s4, 8(sp) # 8-byte Folded Reload
+; RV64P670-NEXT: ld s3, 16(sp) # 8-byte Folded Reload
+; RV64P670-NEXT: ld s2, 24(sp) # 8-byte Folded Reload
+; RV64P670-NEXT: ld s1, 32(sp) # 8-byte Folded Reload
+; RV64P670-NEXT: ld s0, 40(sp) # 8-byte Folded Reload
; RV64P670-NEXT: .cfi_restore s0
; RV64P670-NEXT: .cfi_restore s1
; RV64P670-NEXT: .cfi_restore s2
diff --git a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
index b7b88584f3bdb8..a624306d7dd1ce 100644
--- a/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
+++ b/llvm/test/CodeGen/RISCV/short-forward-branch-opt.ll
@@ -69,39 +69,39 @@ define signext i32 @test3(i32 signext %v, i32 signext %w, i32 signext %x, i32 si
;
; RV64SFB-LABEL: test3:
; RV64SFB: # %bb.0:
-; RV64SFB-NEXT: beqz a4, .LBB2_2
+; RV64SFB-NEXT: bnez a4, .LBB2_2
; RV64SFB-NEXT: # %bb.1:
-; RV64SFB-NEXT: mv a2, a3
+; RV64SFB-NEXT: mv a0, a1
; RV64SFB-NEXT: .LBB2_2:
-; RV64SFB-NEXT: bnez a4, .LBB2_4
+; RV64SFB-NEXT: beqz a4, .LBB2_4
; RV64SFB-NEXT: # %bb.3:
-; RV64SFB-NEXT: mv a0, a1
+; RV64SFB-NEXT: mv a2, a3
; RV64SFB-NEXT: .LBB2_4:
; RV64SFB-NEXT: addw a0, a0, a2
; RV64SFB-NEXT: ret
;
; ZICOND-LABEL: test3:
; ZICOND: # %bb.0:
-; ZICOND-NEXT: beqz a4, .LBB2_2
+; ZICOND-NEXT: bnez a4, .LBB2_2
; ZICOND-NEXT: # %bb.1:
-; ZICOND-NEXT: mv a2, a3
+; ZICOND-NEXT: mv a0, a1
; ZICOND-NEXT: .LBB2_2:
-; ZICOND-NEXT: bnez a4, .LBB2_4
+; ZICOND-NEXT: beqz a4, .LBB2_4
; ZICOND-NEXT: # %bb.3:
-; ZICOND-NEXT: mv a0, a1
+; ZICOND-NEXT: mv a2, a3
; ZICOND-NEXT: .LBB2_4:
; ZICOND-NEXT: addw a0, a0, a2
; ZICOND-NEXT: ret
;
; RV32SFB-LABEL: test3:
; RV32SFB: # %bb.0:
-; RV32SFB-NEXT: beqz a4, .LBB2_2
+; RV32SFB-NEXT: bnez a4, .LBB2_2
; RV32SFB-NEXT: # %bb.1:
-; RV32SFB-NEXT: mv a2, a3
+; RV32SFB-NEXT: mv a0, a1
; RV32SFB-NEXT: .LBB2_2:
-; RV32SFB-NEXT: bnez a4, .LBB2_4
+; RV32SFB-NEXT: beqz a4, .LBB2_4
; RV32SFB-NEXT: # %bb.3:
-; RV32SFB-NEXT: mv a0, a1
+; RV32SFB-NEXT: mv a2, a3
; RV32SFB-NEXT: .LBB2_4:
; RV32SFB-NEXT: add a0, a0, a2
; RV32SFB-NEXT: ret
@@ -444,16 +444,16 @@ define void @sextw_removal_ccor(i1 %c, i32 signext %arg, i32 signext %arg1, i32
; RV64SFB-LABEL: sextw_removal_ccor:
; RV64SFB: # %bb.0: # %bb
; RV64SFB-NEXT: addi sp, sp, -32
-; RV64SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; RV64SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64SFB-NEXT: mv s0, a3
; RV64SFB-NEXT: andi a0, a0, 1
-; RV64SFB-NEXT: mv s1, a2
; RV64SFB-NEXT: beqz a0, .LBB15_4
; RV64SFB-NEXT: # %bb.3: # %bb
; RV64SFB-NEXT: or s0, a3, a1
; RV64SFB-NEXT: .LBB15_4: # %bb
+; RV64SFB-NEXT: mv s1, a2
; RV64SFB-NEXT: .LBB15_1: # %bb2
; RV64SFB-NEXT: # =>This Inner Loop Header: Depth=1
; RV64SFB-NEXT: mv a0, s0
@@ -461,25 +461,25 @@ define void @sextw_removal_ccor(i1 %c, i32 signext %arg, i32 signext %arg1, i32
; RV64SFB-NEXT: sllw s0, s0, s1
; RV64SFB-NEXT: bnez a0, .LBB15_1
; RV64SFB-NEXT: # %bb.2: # %bb7
-; RV64SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64SFB-NEXT: addi sp, sp, 32
; RV64SFB-NEXT: ret
;
; ZICOND-LABEL: sextw_removal_ccor:
; ZICOND: # %bb.0: # %bb
; ZICOND-NEXT: addi sp, sp, -32
-; ZICOND-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; ZICOND-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; ZICOND-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
+; ZICOND-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; ZICOND-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; ZICOND-NEXT: mv s0, a3
; ZICOND-NEXT: andi a0, a0, 1
-; ZICOND-NEXT: mv s1, a2
; ZICOND-NEXT: beqz a0, .LBB15_4
; ZICOND-NEXT: # %bb.3: # %bb
; ZICOND-NEXT: or s0, a3, a1
; ZICOND-NEXT: .LBB15_4: # %bb
+; ZICOND-NEXT: mv s1, a2
; ZICOND-NEXT: .LBB15_1: # %bb2
; ZICOND-NEXT: # =>This Inner Loop Header: Depth=1
; ZICOND-NEXT: mv a0, s0
@@ -487,25 +487,25 @@ define void @sextw_removal_ccor(i1 %c, i32 signext %arg, i32 signext %arg1, i32
; ZICOND-NEXT: sllw s0, s0, s1
; ZICOND-NEXT: bnez a0, .LBB15_1
; ZICOND-NEXT: # %bb.2: # %bb7
-; ZICOND-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; ZICOND-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; ZICOND-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; ZICOND-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; ZICOND-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; ZICOND-NEXT: addi sp, sp, 32
; ZICOND-NEXT: ret
;
; RV32SFB-LABEL: sextw_removal_ccor:
; RV32SFB: # %bb.0: # %bb
; RV32SFB-NEXT: addi sp, sp, -16
-; RV32SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
+; RV32SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32SFB-NEXT: mv s0, a3
; RV32SFB-NEXT: andi a0, a0, 1
-; RV32SFB-NEXT: mv s1, a2
; RV32SFB-NEXT: beqz a0, .LBB15_4
; RV32SFB-NEXT: # %bb.3: # %bb
; RV32SFB-NEXT: or s0, a3, a1
; RV32SFB-NEXT: .LBB15_4: # %bb
+; RV32SFB-NEXT: mv s1, a2
; RV32SFB-NEXT: .LBB15_1: # %bb2
; RV32SFB-NEXT: # =>This Inner Loop Header: Depth=1
; RV32SFB-NEXT: mv a0, s0
@@ -513,9 +513,9 @@ define void @sextw_removal_ccor(i1 %c, i32 signext %arg, i32 signext %arg1, i32
; RV32SFB-NEXT: sll s0, s0, s1
; RV32SFB-NEXT: bnez a0, .LBB15_1
; RV32SFB-NEXT: # %bb.2: # %bb7
-; RV32SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32SFB-NEXT: addi sp, sp, 16
; RV32SFB-NEXT: ret
bb:
@@ -563,16 +563,16 @@ define void @sextw_removal_ccaddw(i1 %c, i32 signext %arg, i32 signext %arg1, i3
; RV64SFB-LABEL: sextw_removal_ccaddw:
; RV64SFB: # %bb.0: # %bb
; RV64SFB-NEXT: addi sp, sp, -32
-; RV64SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; RV64SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; RV64SFB-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; RV64SFB-NEXT: mv s1, a1
+; RV64SFB-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; RV64SFB-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; RV64SFB-NEXT: andi a0, a0, 1
-; RV64SFB-NEXT: mv s0, a2
+; RV64SFB-NEXT: mv s1, a1
; RV64SFB-NEXT: beqz a0, .LBB16_4
; RV64SFB-NEXT: # %bb.3: # %bb
; RV64SFB-NEXT: addw s1, a1, a3
; RV64SFB-NEXT: .LBB16_4: # %bb
+; RV64SFB-NEXT: mv s0, a2
; RV64SFB-NEXT: .LBB16_1: # %bb2
; RV64SFB-NEXT: # =>This Inner Loop Header: Depth=1
; RV64SFB-NEXT: mv a0, s1
@@ -580,25 +580,25 @@ define void @sextw_removal_ccaddw(i1 %c, i32 signext %arg, i32 signext %arg1, i3
; RV64SFB-NEXT: sllw s1, s1, s0
; RV64SFB-NEXT: bnez a0, .LBB16_1
; RV64SFB-NEXT: # %bb.2: # %bb7
-; RV64SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; RV64SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; RV64SFB-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; RV64SFB-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; RV64SFB-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; RV64SFB-NEXT: addi sp, sp, 32
; RV64SFB-NEXT: ret
;
; ZICOND-LABEL: sextw_removal_ccaddw:
; ZICOND: # %bb.0: # %bb
; ZICOND-NEXT: addi sp, sp, -32
-; ZICOND-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
-; ZICOND-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
; ZICOND-NEXT: sd s1, 8(sp) # 8-byte Folded Spill
-; ZICOND-NEXT: mv s1, a1
+; ZICOND-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; ZICOND-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
; ZICOND-NEXT: andi a0, a0, 1
-; ZICOND-NEXT: mv s0, a2
+; ZICOND-NEXT: mv s1, a1
; ZICOND-NEXT: beqz a0, .LBB16_4
; ZICOND-NEXT: # %bb.3: # %bb
; ZICOND-NEXT: addw s1, a1, a3
; ZICOND-NEXT: .LBB16_4: # %bb
+; ZICOND-NEXT: mv s0, a2
; ZICOND-NEXT: .LBB16_1: # %bb2
; ZICOND-NEXT: # =>This Inner Loop Header: Depth=1
; ZICOND-NEXT: mv a0, s1
@@ -606,25 +606,25 @@ define void @sextw_removal_ccaddw(i1 %c, i32 signext %arg, i32 signext %arg1, i3
; ZICOND-NEXT: sllw s1, s1, s0
; ZICOND-NEXT: bnez a0, .LBB16_1
; ZICOND-NEXT: # %bb.2: # %bb7
-; ZICOND-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
-; ZICOND-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
; ZICOND-NEXT: ld s1, 8(sp) # 8-byte Folded Reload
+; ZICOND-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; ZICOND-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; ZICOND-NEXT: addi sp, sp, 32
; ZICOND-NEXT: ret
;
; RV32SFB-LABEL: sextw_removal_ccaddw:
; RV32SFB: # %bb.0: # %bb
; RV32SFB-NEXT: addi sp, sp, -16
-; RV32SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
-; RV32SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
; RV32SFB-NEXT: sw s1, 4(sp) # 4-byte Folded Spill
-; RV32SFB-NEXT: mv s1, a1
+; RV32SFB-NEXT: sw s0, 8(sp) # 4-byte Folded Spill
+; RV32SFB-NEXT: sw ra, 12(sp) # 4-byte Folded Spill
; RV32SFB-NEXT: andi a0, a0, 1
-; RV32SFB-NEXT: mv s0, a2
+; RV32SFB-NEXT: mv s1, a1
; RV32SFB-NEXT: beqz a0, .LBB16_4
; RV32SFB-NEXT: # %bb.3: # %bb
; RV32SFB-NEXT: add s1, a1, a3
; RV32SFB-NEXT: .LBB16_4: # %bb
+; RV32SFB-NEXT: mv s0, a2
; RV32SFB-NEXT: .LBB16_1: # %bb2
; RV32SFB-NEXT: # =>This Inner Loop Header: Depth=1
; RV32SFB-NEXT: mv a0, s1
@@ -632,9 +632,9 @@ define void @sextw_removal_ccaddw(i1 %c, i32 signext %arg, i32 signext %arg1, i3
; RV32SFB-NEXT: sll s1, s1, s0
; RV32SFB-NEXT: bnez a0, .LBB16_1
; RV32SFB-NEXT: # %bb.2: # %bb7
-; RV32SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
-; RV32SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
; RV32SFB-NEXT: lw s1, 4(sp) # 4-byte Folded Reload
+; RV32SFB-NEXT: lw s0, 8(sp) # 4-byte Folded Reload
+; RV32SFB-NEXT: lw ra, 12(sp) # 4-byte Folded Reload
; RV32SFB-NEXT: addi sp, sp, 16
; RV32SFB-NEXT: ret
bb:
@@ -813,8 +813,8 @@ define i64 @select_sll(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; RV32SFB-NEXT: not a7, a2
; RV32SFB-NEXT: srli a0, a0, 1
; RV32SFB-NEXT: sll t0, a1, a2
-; RV32SFB-NEXT: addi a2, a2, -32
; RV32SFB-NEXT: srl a0, a0, a7
+; RV32SFB-NEXT: addi a2, a2, -32
; RV32SFB-NEXT: mv a1, a3
; RV32SFB-NEXT: bltz a2, .LBB20_2
; RV32SFB-NEXT: # %bb.1: # %entry
@@ -828,11 +828,11 @@ define i64 @select_sll(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; RV32SFB-NEXT: # %bb.5: # %entry
; RV32SFB-NEXT: mv a3, a4
; RV32SFB-NEXT: .LBB20_6: # %entry
+; RV32SFB-NEXT: mv a0, a3
; RV32SFB-NEXT: beqz a6, .LBB20_8
; RV32SFB-NEXT: # %bb.7: # %entry
; RV32SFB-NEXT: mv a1, a5
; RV32SFB-NEXT: .LBB20_8: # %entry
-; RV32SFB-NEXT: mv a0, a3
; RV32SFB-NEXT: ret
entry:
%0 = shl i64 %A, %B
@@ -874,8 +874,8 @@ define i64 @select_srl(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; RV32SFB-NEXT: not a7, a2
; RV32SFB-NEXT: slli a1, a1, 1
; RV32SFB-NEXT: srl t0, a0, a2
-; RV32SFB-NEXT: addi a2, a2, -32
; RV32SFB-NEXT: sll a1, a1, a7
+; RV32SFB-NEXT: addi a2, a2, -32
; RV32SFB-NEXT: mv a0, a3
; RV32SFB-NEXT: bltz a2, .LBB21_2
; RV32SFB-NEXT: # %bb.1: # %entry
@@ -889,11 +889,11 @@ define i64 @select_srl(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; RV32SFB-NEXT: # %bb.5: # %entry
; RV32SFB-NEXT: mv a3, a5
; RV32SFB-NEXT: .LBB21_6: # %entry
+; RV32SFB-NEXT: mv a1, a3
; RV32SFB-NEXT: beqz a6, .LBB21_8
; RV32SFB-NEXT: # %bb.7: # %entry
; RV32SFB-NEXT: mv a0, a4
; RV32SFB-NEXT: .LBB21_8: # %entry
-; RV32SFB-NEXT: mv a1, a3
; RV32SFB-NEXT: ret
entry:
%0 = lshr i64 %A, %B
@@ -935,8 +935,8 @@ define i64 @select_sra(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; RV32SFB-NEXT: not a7, a2
; RV32SFB-NEXT: slli t0, a1, 1
; RV32SFB-NEXT: srl t1, a0, a2
-; RV32SFB-NEXT: addi a2, a2, -32
; RV32SFB-NEXT: sll a7, t0, a7
+; RV32SFB-NEXT: addi a2, a2, -32
; RV32SFB-NEXT: mv a0, a3
; RV32SFB-NEXT: bltz a2, .LBB22_2
; RV32SFB-NEXT: # %bb.1: # %entry
@@ -950,11 +950,11 @@ define i64 @select_sra(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; RV32SFB-NEXT: # %bb.5: # %entry
; RV32SFB-NEXT: mv a3, a5
; RV32SFB-NEXT: .LBB22_6: # %entry
+; RV32SFB-NEXT: mv a1, a3
; RV32SFB-NEXT: beqz a6, .LBB22_8
; RV32SFB-NEXT: # %bb.7: # %entry
; RV32SFB-NEXT: mv a0, a4
; RV32SFB-NEXT: .LBB22_8: # %entry
-; RV32SFB-NEXT: mv a1, a3
; RV32SFB-NEXT: ret
entry:
%0 = ashr i64 %A, %B
@@ -1044,8 +1044,8 @@ define i64 @select_addi(i64 %A, i64 %C, i1 zeroext %cond) {
; RV32SFB-NEXT: # %bb.3: # %entry
; RV32SFB-NEXT: add a3, a1, a0
; RV32SFB-NEXT: .LBB24_4: # %entry
-; RV32SFB-NEXT: mv a0, a5
; RV32SFB-NEXT: mv a1, a3
+; RV32SFB-NEXT: mv a0, a5
; RV32SFB-NEXT: ret
entry:
%0 = add i64 %A, 1234
@@ -1134,11 +1134,11 @@ define i64 @select_ori(i64 %A, i64 %C, i1 zeroext %cond) {
; RV32SFB-NEXT: # %bb.1: # %entry
; RV32SFB-NEXT: ori a2, a0, 890
; RV32SFB-NEXT: .LBB26_2: # %entry
+; RV32SFB-NEXT: mv a0, a2
; RV32SFB-NEXT: beqz a4, .LBB26_4
; RV32SFB-NEXT: # %bb.3: # %entry
; RV32SFB-NEXT: mv a1, a3
; RV32SFB-NEXT: .LBB26_4: # %entry
-; RV32SFB-NEXT: mv a0, a2
; RV32SFB-NEXT: ret
entry:
%0 = or i64 %A, 890
@@ -1180,11 +1180,11 @@ define i64 @select_xori(i64 %A, i64 %C, i1 zeroext %cond) {
; RV32SFB-NEXT: # %bb.1: # %entry
; RV32SFB-NEXT: xori a2, a0, 321
; RV32SFB-NEXT: .LBB27_2: # %entry
+; RV32SFB-NEXT: mv a0, a2
; RV32SFB-NEXT: beqz a4, .LBB27_4
; RV32SFB-NEXT: # %bb.3: # %entry
; RV32SFB-NEXT: mv a1, a3
; RV32SFB-NEXT: .LBB27_4: # %entry
-; RV32SFB-NEXT: mv a0, a2
; RV32SFB-NEXT: ret
entry:
%0 = xor i64 %A, 321
@@ -1316,8 +1316,8 @@ define i64 @select_srai(i64 %A, i64 %C, i1 zeroext %cond) {
;
; RV32SFB-LABEL: select_srai:
; RV32SFB: # %bb.0: # %entry
-; RV32SFB-NEXT: mv a0, a2
; RV32SFB-NEXT: srai a1, a1, 31
+; RV32SFB-NEXT: mv a0, a2
; RV32SFB-NEXT: bnez a4, .LBB30_2
; RV32SFB-NEXT: # %bb.1: # %entry
; RV32SFB-NEXT: mv a0, a1
@@ -1573,8 +1573,8 @@ define i64 @select_andn(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; RV32SFB-NEXT: # %bb.3: # %entry
; RV32SFB-NEXT: andn a4, a0, a2
; RV32SFB-NEXT: .LBB36_4: # %entry
-; RV32SFB-NEXT: mv a0, a4
; RV32SFB-NEXT: mv a1, a5
+; RV32SFB-NEXT: mv a0, a4
; RV32SFB-NEXT: ret
entry:
%0 = xor i64 %B, -1
@@ -1621,8 +1621,8 @@ define i64 @select_orn(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; RV32SFB-NEXT: # %bb.3: # %entry
; RV32SFB-NEXT: orn a4, a0, a2
; RV32SFB-NEXT: .LBB37_4: # %entry
-; RV32SFB-NEXT: mv a0, a4
; RV32SFB-NEXT: mv a1, a5
+; RV32SFB-NEXT: mv a0, a4
; RV32SFB-NEXT: ret
entry:
%0 = xor i64 %B, -1
@@ -1669,8 +1669,8 @@ define i64 @select_xnor(i64 %A, i64 %B, i64 %C, i1 zeroext %cond) {
; RV32SFB-NEXT: # %bb.3: # %entry
; RV32SFB-NEXT: xnor a4, a0, a2
; RV32SFB-NEXT: .LBB38_4: # %entry
-; RV32SFB-NEXT: mv a0, a4
; RV32SFB-NEXT: mv a1, a5
+; RV32SFB-NEXT: mv a0, a4
; RV32SFB-NEXT: ret
entry:
%0 = xor i64 %A, %B
More information about the llvm-commits
mailing list