[llvm] [RISCV] Mark V0 regclasses as larger superclasses of non-V0 classes (PR #70109)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 24 12:54:47 PDT 2023
https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/70109
This allows the register coalescer to inflate virtual registers from e.g.
vrnov0 -> vr after deleting copies, which in turn avoids some spills.
>From 1cdef986f86f1bf7e22a5ab726058b6a438bdf65 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 24 Oct 2023 20:47:59 +0100
Subject: [PATCH] [RISCV] Mark V0 regclasses as larger superclasses of non-V0
classes
This allows the register coalescer to inflate virtual registers from e.g.
vrnov0 -> vr after deleting copies, which in turn avoids some spills.
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 8 ++
...regalloc-last-chance-recoloring-failure.ll | 40 ++----
llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll | 32 ++---
llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll | 32 ++---
llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll | 18 +--
llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll | 4 +-
.../RISCV/rvv/fixed-vectors-bitreverse-vp.ll | 32 ++---
.../RISCV/rvv/fixed-vectors-bswap-vp.ll | 32 ++---
.../RISCV/rvv/fixed-vectors-ctlz-vp.ll | 8 +-
.../RISCV/rvv/fixed-vectors-cttz-vp.ll | 8 +-
.../RISCV/rvv/fixed-vectors-fmaximum.ll | 13 +-
.../RISCV/rvv/fixed-vectors-fminimum.ll | 13 +-
.../RISCV/rvv/fixed-vectors-trunc-vp.ll | 62 ++++-----
.../test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll | 72 ++--------
.../test/CodeGen/RISCV/rvv/fminimum-sdnode.ll | 72 ++--------
llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll | 126 ++++++++++--------
16 files changed, 211 insertions(+), 361 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index fcfc5c7821ffe29..cd0bcb8e6cfc495 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -663,6 +663,14 @@ RISCVRegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC,
const MachineFunction &) const {
if (RC == &RISCV::VMV0RegClass)
return &RISCV::VRRegClass;
+ if (RC == &RISCV::VRNoV0RegClass)
+ return &RISCV::VRRegClass;
+ if (RC == &RISCV::VRM2NoV0RegClass)
+ return &RISCV::VRM2RegClass;
+ if (RC == &RISCV::VRM4NoV0RegClass)
+ return &RISCV::VRM4RegClass;
+ if (RC == &RISCV::VRM8NoV0RegClass)
+ return &RISCV::VRM8RegClass;
return RC;
}
diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
index c15321057aeb86b..84ff1bf646280ef 100644
--- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
+++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
@@ -20,17 +20,13 @@ define void @last_chance_recoloring_failure() {
; CHECK-NEXT: .cfi_offset ra, -8
; CHECK-NEXT: .cfi_offset s0, -16
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: sub sp, sp, a0
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 24 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 16 * vlenb
; CHECK-NEXT: li a0, 55
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
; CHECK-NEXT: vloxseg2ei32.v v16, (a0), v8
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add a0, sp, a0
-; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 2
; CHECK-NEXT: vs4r.v v16, (a0) # Unknown-size Folded Spill
@@ -42,47 +38,39 @@ define void @last_chance_recoloring_failure() {
; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma
; CHECK-NEXT: vfwadd.vv v16, v8, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: call func at plt
; CHECK-NEXT: li a0, 32
; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma
-; CHECK-NEXT: vrgather.vv v4, v8, v8, v0.t
+; CHECK-NEXT: vrgather.vv v16, v8, v8, v0.t
; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: addi a1, sp, 16
; CHECK-NEXT: csrr a2, vlenb
; CHECK-NEXT: slli a2, a2, 2
-; CHECK-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: add a1, a1, a2
-; CHECK-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vfwsub.wv v8, v16, v24
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vfwsub.wv v8, v0, v20
; CHECK-NEXT: vsetvli zero, a0, e16, m4, tu, mu
-; CHECK-NEXT: vssubu.vv v4, v4, v8, v0.t
+; CHECK-NEXT: vssubu.vv v16, v16, v8, v0.t
; CHECK-NEXT: vsetvli zero, s0, e32, m8, tu, mu
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vfdiv.vv v8, v16, v8, v0.t
; CHECK-NEXT: vse32.v v8, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
index 7698f860589aaf1..f41a2a06c69bf44 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
@@ -2348,16 +2348,14 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -2369,15 +2367,13 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a5, sp, 16
-; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
@@ -2712,16 +2708,14 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -2733,15 +2727,13 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a5, sp, 16
-; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
index 6aac13a0bcbb873..9952e49116bfcc1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
@@ -1068,16 +1068,14 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -1089,15 +1087,13 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
@@ -1317,16 +1313,14 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetvli a6, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -1338,15 +1332,13 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
index 9767ba4bbc3b7a6..d8689ecf05a5e3b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
@@ -2070,7 +2070,8 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
; RV32-NEXT: vmv1r.v v24, v0
; RV32-NEXT: csrr a1, vlenb
-; RV32-NEXT: slli a1, a1, 5
+; RV32-NEXT: li a2, 40
+; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -2089,35 +2090,30 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: addi a3, a3, -1
; RV32-NEXT: and a2, a3, a2
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 40
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV32-NEXT: lui a3, 349525
; RV32-NEXT: addi a3, a3, 1365
; RV32-NEXT: vsetvli a4, zero, e32, m8, ta, ma
-; RV32-NEXT: vmv.v.x v16, a3
+; RV32-NEXT: vmv.v.x v8, a3
; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 24
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vand.vv v8, v16, v8, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: li a4, 40
; RV32-NEXT: mul a3, a3, a4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT: vand.vv v8, v8, v16, v0.t
-; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 5
-; RV32-NEXT: add a3, sp, a3
-; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload
; RV32-NEXT: vsub.vv v8, v16, v8, v0.t
; RV32-NEXT: csrr a3, vlenb
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
index 1dda8aa458d9069..dbd2a1fc6eac31f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
@@ -2439,13 +2439,13 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 40
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 24
; RV32-NEXT: mul a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
index 22f92fe48e22e71..91bf3e981e0a6c4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
@@ -1710,16 +1710,14 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 48
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 48
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 48
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -1731,15 +1729,13 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 48
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a5, sp, 48
-; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
@@ -2080,16 +2076,14 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 48
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 48
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 48
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -2101,15 +2095,13 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 48
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a5, sp, 48
-; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
index 22061040ddbc1c2..6308f73e219da10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll
@@ -796,16 +796,14 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -817,15 +815,13 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
@@ -1033,16 +1029,14 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
-; RV32-NEXT: addi a4, sp, 16
+; RV32-NEXT: csrr a4, vlenb
+; RV32-NEXT: slli a4, a4, 4
+; RV32-NEXT: add a4, sp, a4
+; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vand.vx v16, v8, a4, v0.t
-; RV32-NEXT: vsll.vi v16, v16, 24, v0.t
-; RV32-NEXT: csrr a5, vlenb
-; RV32-NEXT: slli a5, a5, 4
-; RV32-NEXT: add a5, sp, a5
-; RV32-NEXT: addi a5, a5, 16
-; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
+; RV32-NEXT: vsll.vi v24, v16, 24, v0.t
; RV32-NEXT: addi a5, sp, 8
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
@@ -1054,15 +1048,13 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v16, 8, v0.t
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 16
; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 4
; RV32-NEXT: add a0, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
index d47971ef5a13ca6..37e6c35196c6a4b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
@@ -2203,12 +2203,12 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: addi a0, sp, 48
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 24
; RV32-NEXT: mul a0, a0, a1
@@ -4792,12 +4792,12 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: vnot.v v8, v8, v0.t
; RV32-NEXT: addi a0, sp, 48
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 24
; RV32-NEXT: mul a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
index 345e4180bba31a6..082ac1871e94096 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
@@ -1863,12 +1863,12 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
; RV32-NEXT: vand.vv v8, v16, v8, v0.t
; RV32-NEXT: addi a0, sp, 48
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 24
; RV32-NEXT: mul a0, a0, a1
@@ -4052,12 +4052,12 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
; RV32-NEXT: vand.vv v8, v16, v8, v0.t
; RV32-NEXT: addi a0, sp, 48
; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 5
; RV32-NEXT: add a0, sp, a0
; RV32-NEXT: addi a0, a0, 48
-; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: li a1, 24
; RV32-NEXT: mul a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
index dee94ecedff68dd..d0ba28fc30f4ee2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll
@@ -264,24 +264,13 @@ declare <16 x double> @llvm.maximum.v16f64(<16 x double>, <16 x double>)
define <16 x double> @vfmax_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwind {
; CHECK-LABEL: vfmax_v16f64_vv:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v1, v16, v16
; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmax.vv v8, v8, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vfmax.vv v8, v8, v24
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.maximum.v16f64(<16 x double> %a, <16 x double> %b)
ret <16 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
index a201dcc47853b83..10e972963d4e491 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll
@@ -264,24 +264,13 @@ declare <16 x double> @llvm.minimum.v16f64(<16 x double>, <16 x double>)
define <16 x double> @vfmin_v16f64_vv(<16 x double> %a, <16 x double> %b) nounwind {
; CHECK-LABEL: vfmin_v16f64_vv:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v1, v16, v16
; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmin.vv v8, v8, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vfmin.vv v8, v8, v24
; CHECK-NEXT: ret
%v = call <16 x double> @llvm.minimum.v16f64(<16 x double> %a, <16 x double> %b)
ret <16 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
index 8a0d8e1791b0f60..34b0789d801a3c2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll
@@ -230,20 +230,20 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: slli a2, a2, 6
; CHECK-NEXT: sub sp, sp, a2
; CHECK-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 64 * vlenb
-; CHECK-NEXT: vmv1r.v v1, v0
+; CHECK-NEXT: vmv1r.v v4, v0
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: li a3, 24
-; CHECK-NEXT: mul a2, a2, a3
+; CHECK-NEXT: slli a2, a2, 5
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 5
+; CHECK-NEXT: li a3, 40
+; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma
-; CHECK-NEXT: vslidedown.vi v3, v0, 8
+; CHECK-NEXT: vslidedown.vi v1, v0, 8
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
; CHECK-NEXT: vslidedown.vi v26, v0, 4
; CHECK-NEXT: addi a2, a1, 512
@@ -256,7 +256,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v27, v3, 4
+; CHECK-NEXT: vslidedown.vi v27, v1, 4
; CHECK-NEXT: addi a2, a1, 640
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v16, (a2)
@@ -277,7 +277,8 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma
; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: slli a2, a2, 4
+; CHECK-NEXT: li a5, 24
+; CHECK-NEXT: mul a2, a2, a5
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill
@@ -318,8 +319,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a1)
; CHECK-NEXT: csrr a6, vlenb
-; CHECK-NEXT: li t0, 40
-; CHECK-NEXT: mul a6, a6, t0
+; CHECK-NEXT: slli a6, a6, 3
; CHECK-NEXT: add a6, sp, a6
; CHECK-NEXT: addi a6, a6, 16
; CHECK-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill
@@ -337,7 +337,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: vl8r.v v16, (t0) # Unknown-size Folded Reload
; CHECK-NEXT: vnsrl.wi v8, v16, 0, v0.t
; CHECK-NEXT: csrr t0, vlenb
-; CHECK-NEXT: slli t0, t0, 3
+; CHECK-NEXT: slli t0, t0, 4
; CHECK-NEXT: add t0, sp, t0
; CHECK-NEXT: addi t0, t0, 16
; CHECK-NEXT: vs8r.v v8, (t0) # Unknown-size Folded Spill
@@ -346,15 +346,14 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: li a6, 16
; CHECK-NEXT: .LBB16_6:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v20, v3, 2
+; CHECK-NEXT: vslidedown.vi v20, v1, 2
; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; CHECK-NEXT: vle64.v v8, (a5)
; CHECK-NEXT: addi a1, a1, 256
; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma
; CHECK-NEXT: vmv1r.v v0, v26
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 40
-; CHECK-NEXT: mul a5, a5, a6
+; CHECK-NEXT: slli a5, a5, 3
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
@@ -383,23 +382,19 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: li a4, 16
; CHECK-NEXT: .LBB16_10:
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; CHECK-NEXT: vslidedown.vi v2, v1, 2
+; CHECK-NEXT: vslidedown.vi v5, v4, 2
; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v3
+; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vnsrl.wi v8, v24, 0, v0.t
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 40
-; CHECK-NEXT: mul a1, a1, a4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: bltu a7, a3, .LBB16_12
; CHECK-NEXT: # %bb.11:
; CHECK-NEXT: li a7, 32
; CHECK-NEXT: .LBB16_12:
; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: li a4, 24
+; CHECK-NEXT: mul a1, a1, a4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -418,7 +413,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
@@ -436,15 +431,10 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 40
-; CHECK-NEXT: mul a1, a1, a4
-; CHECK-NEXT: add a1, sp, a1
-; CHECK-NEXT: addi a1, a1, 16
-; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vmv4r.v v8, v0
; CHECK-NEXT: vslideup.vi v8, v16, 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 40
+; CHECK-NEXT: li a4, 24
; CHECK-NEXT: mul a1, a1, a4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
@@ -454,10 +444,9 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: addi a4, a4, -1
; CHECK-NEXT: and a1, a4, a1
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v2
+; CHECK-NEXT: vmv1r.v v0, v5
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a4, 24
-; CHECK-NEXT: mul a1, a1, a4
+; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
@@ -467,9 +456,10 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: li a7, 16
; CHECK-NEXT: .LBB16_14:
; CHECK-NEXT: vsetvli zero, a7, e32, m4, ta, ma
-; CHECK-NEXT: vmv1r.v v0, v1
+; CHECK-NEXT: vmv1r.v v0, v4
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 5
+; CHECK-NEXT: li a2, 40
+; CHECK-NEXT: mul a1, a1, a2
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
@@ -479,7 +469,7 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze
; CHECK-NEXT: vse32.v v16, (a0)
; CHECK-NEXT: addi a1, a0, 256
; CHECK-NEXT: csrr a2, vlenb
-; CHECK-NEXT: li a3, 40
+; CHECK-NEXT: li a3, 24
; CHECK-NEXT: mul a2, a2, a3
; CHECK-NEXT: add a2, sp, a2
; CHECK-NEXT: addi a2, a2, 16
diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
index c53fa714157d5c5..c954c9a6d0d1137 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll
@@ -199,34 +199,21 @@ declare <vscale x 32 x half> @llvm.maximum.nxv32f16(<vscale x 32 x half>, <vscal
define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale x 32 x half> %b) nounwind {
; ZVFH-LABEL: vfmax_nxv32f16_vv:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: addi sp, sp, -16
-; ZVFH-NEXT: csrr a0, vlenb
-; ZVFH-NEXT: slli a0, a0, 3
-; ZVFH-NEXT: sub sp, sp, a0
; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v1, v16, v16
; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0
-; ZVFH-NEXT: addi a0, sp, 16
-; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFH-NEXT: vmv1r.v v0, v1
; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0
-; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFH-NEXT: vfmax.vv v8, v8, v16
-; ZVFH-NEXT: csrr a0, vlenb
-; ZVFH-NEXT: slli a0, a0, 3
-; ZVFH-NEXT: add sp, sp, a0
-; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: vfmax.vv v8, v8, v24
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmax_nxv32f16_vv:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: vmv8r.v v24, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
@@ -234,24 +221,14 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vmv8r.v v0, v8
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT: vmfeq.vv v1, v16, v16
-; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmfeq.vv v1, v24, v24
+; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v1
-; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v8, v0
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0
; ZVFHMIN-NEXT: vfmax.vv v24, v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
@@ -269,25 +246,24 @@ define <vscale x 32 x half> @vfmax_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
; ZVFHMIN-NEXT: vmfeq.vv v1, v8, v8
; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v8, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vmv1r.v v0, v1
; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v16, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfmax.vv v16, v16, v8
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfmax.vv v16, v16, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -368,24 +344,13 @@ declare <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float>, <vsc
define <vscale x 16 x float> @vfmax_nxv16f32_vv(<vscale x 16 x float> %a, <vscale x 16 x float> %b) nounwind {
; CHECK-LABEL: vfmax_nxv16f32_vv:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v1, v16, v16
; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmax.vv v8, v8, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vfmax.vv v8, v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.maximum.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b)
ret <vscale x 16 x float> %v
@@ -447,24 +412,13 @@ declare <vscale x 8 x double> @llvm.maximum.nxv8f64(<vscale x 8 x double>, <vsca
define <vscale x 8 x double> @vfmax_nxv8f64_vv(<vscale x 8 x double> %a, <vscale x 8 x double> %b) nounwind {
; CHECK-LABEL: vfmax_nxv8f64_vv:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v1, v16, v16
; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmax.vv v8, v8, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vfmax.vv v8, v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 8 x double> @llvm.maximum.nxv8f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b)
ret <vscale x 8 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
index b386792cd3688c8..567068fdfb1c47a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll
@@ -199,34 +199,21 @@ declare <vscale x 32 x half> @llvm.minimum.nxv32f16(<vscale x 32 x half>, <vscal
define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale x 32 x half> %b) nounwind {
; ZVFH-LABEL: vfmin_nxv32f16_vv:
; ZVFH: # %bb.0:
-; ZVFH-NEXT: addi sp, sp, -16
-; ZVFH-NEXT: csrr a0, vlenb
-; ZVFH-NEXT: slli a0, a0, 3
-; ZVFH-NEXT: sub sp, sp, a0
; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma
; ZVFH-NEXT: vmfeq.vv v0, v8, v8
; ZVFH-NEXT: vmfeq.vv v1, v16, v16
; ZVFH-NEXT: vmerge.vvm v24, v8, v16, v0
-; ZVFH-NEXT: addi a0, sp, 16
-; ZVFH-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFH-NEXT: vmv1r.v v0, v1
; ZVFH-NEXT: vmerge.vvm v8, v16, v8, v0
-; ZVFH-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; ZVFH-NEXT: vfmin.vv v8, v8, v16
-; ZVFH-NEXT: csrr a0, vlenb
-; ZVFH-NEXT: slli a0, a0, 3
-; ZVFH-NEXT: add sp, sp, a0
-; ZVFH-NEXT: addi sp, sp, 16
+; ZVFH-NEXT: vfmin.vv v8, v8, v24
; ZVFH-NEXT: ret
;
; ZVFHMIN-LABEL: vfmin_nxv32f16_vv:
; ZVFHMIN: # %bb.0:
; ZVFHMIN-NEXT: addi sp, sp, -16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: sub sp, sp, a0
-; ZVFHMIN-NEXT: vmv8r.v v24, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
@@ -234,24 +221,14 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
; ZVFHMIN-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vmv8r.v v0, v8
; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma
-; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24
+; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16
; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v0
; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma
; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8
-; ZVFHMIN-NEXT: vmfeq.vv v1, v16, v16
-; ZVFHMIN-NEXT: vmerge.vvm v24, v8, v16, v0
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
+; ZVFHMIN-NEXT: vmfeq.vv v1, v24, v24
+; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v24, v0
; ZVFHMIN-NEXT: vmv1r.v v0, v1
-; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v8, v0
-; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
-; ZVFHMIN-NEXT: add a0, sp, a0
-; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vmerge.vvm v8, v24, v8, v0
; ZVFHMIN-NEXT: vfmin.vv v24, v8, v16
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v24
@@ -269,25 +246,24 @@ define <vscale x 32 x half> @vfmin_nxv32f16_vv(<vscale x 32 x half> %a, <vscale
; ZVFHMIN-NEXT: vmfeq.vv v1, v8, v8
; ZVFHMIN-NEXT: vmerge.vvm v24, v16, v8, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
; ZVFHMIN-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; ZVFHMIN-NEXT: vmv1r.v v0, v1
; ZVFHMIN-NEXT: vmerge.vvm v16, v8, v16, v0
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: slli a0, a0, 4
+; ZVFHMIN-NEXT: slli a0, a0, 3
; ZVFHMIN-NEXT: add a0, sp, a0
; ZVFHMIN-NEXT: addi a0, a0, 16
-; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; ZVFHMIN-NEXT: vfmin.vv v16, v16, v8
+; ZVFHMIN-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
+; ZVFHMIN-NEXT: vfmin.vv v16, v16, v24
; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; ZVFHMIN-NEXT: addi a0, sp, 16
; ZVFHMIN-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v16
; ZVFHMIN-NEXT: csrr a0, vlenb
-; ZVFHMIN-NEXT: li a1, 24
-; ZVFHMIN-NEXT: mul a0, a0, a1
+; ZVFHMIN-NEXT: slli a0, a0, 4
; ZVFHMIN-NEXT: add sp, sp, a0
; ZVFHMIN-NEXT: addi sp, sp, 16
; ZVFHMIN-NEXT: ret
@@ -368,24 +344,13 @@ declare <vscale x 16 x float> @llvm.minimum.nxv16f32(<vscale x 16 x float>, <vsc
define <vscale x 16 x float> @vfmin_nxv16f32_vv(<vscale x 16 x float> %a, <vscale x 16 x float> %b) nounwind {
; CHECK-LABEL: vfmin_nxv16f32_vv:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v1, v16, v16
; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmin.vv v8, v8, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vfmin.vv v8, v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 16 x float> @llvm.minimum.nxv16f32(<vscale x 16 x float> %a, <vscale x 16 x float> %b)
ret <vscale x 16 x float> %v
@@ -447,24 +412,13 @@ declare <vscale x 8 x double> @llvm.minimum.nxv8f64(<vscale x 8 x double>, <vsca
define <vscale x 8 x double> @vfmin_nxv8f64_vv(<vscale x 8 x double> %a, <vscale x 8 x double> %b) nounwind {
; CHECK-LABEL: vfmin_nxv8f64_vv:
; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: sub sp, sp, a0
; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma
; CHECK-NEXT: vmfeq.vv v0, v8, v8
; CHECK-NEXT: vmfeq.vv v1, v16, v16
; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0
-; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vmv1r.v v0, v1
; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0
-; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vfmin.vv v8, v8, v16
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: vfmin.vv v8, v8, v24
; CHECK-NEXT: ret
%v = call <vscale x 8 x double> @llvm.minimum.nxv8f64(<vscale x 8 x double> %a, <vscale x 8 x double> %b)
ret <vscale x 8 x double> %v
diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
index d79d28d52e73c9b..d1218b583fced4d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
@@ -966,13 +966,13 @@ define <vscale x 16 x i64> @fshr_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 5
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 40
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -981,7 +981,7 @@ define <vscale x 16 x i64> @fshr_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: add a5, a0, a3
; CHECK-NEXT: vl8re64.v v8, (a5)
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: slli a5, a5, 4
+; CHECK-NEXT: slli a5, a5, 3
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
@@ -995,12 +995,15 @@ define <vscale x 16 x i64> @fshr_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: and a6, a6, a3
; CHECK-NEXT: li a3, 63
; CHECK-NEXT: vl8re64.v v8, (a5)
-; CHECK-NEXT: addi a5, sp, 16
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: li a7, 40
+; CHECK-NEXT: mul a5, a5, a7
+; CHECK-NEXT: add a5, sp, a5
+; CHECK-NEXT: addi a5, a5, 16
; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re64.v v16, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a5, 24
-; CHECK-NEXT: mul a0, a0, a5
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
@@ -1012,47 +1015,53 @@ define <vscale x 16 x i64> @fshr_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
-; CHECK-NEXT: vand.vx v16, v8, a3, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: li a2, 40
+; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsrl.vv v16, v16, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vsrl.vv v16, v16, v8, v0.t
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a2, 40
+; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vnot.v v8, v8, v0.t
; CHECK-NEXT: vand.vx v16, v8, a3, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: li a2, 24
+; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
; CHECK-NEXT: vsll.vv v8, v8, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: li a2, 40
+; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
@@ -1070,18 +1079,19 @@ define <vscale x 16 x i64> @fshr_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -1101,8 +1111,7 @@ define <vscale x 16 x i64> @fshr_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: vnot.v v16, v8, v0.t
; CHECK-NEXT: vand.vx v16, v16, a3, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 40
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
@@ -1116,7 +1125,8 @@ define <vscale x 16 x i64> @fshr_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vor.vv v8, v8, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: li a1, 40
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -1143,13 +1153,12 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
; CHECK-NEXT: vmv1r.v v24, v0
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: li a3, 40
-; CHECK-NEXT: mul a1, a1, a3
+; CHECK-NEXT: slli a1, a1, 5
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -1158,8 +1167,7 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: add a5, a0, a3
; CHECK-NEXT: vl8re64.v v8, (a5)
; CHECK-NEXT: csrr a5, vlenb
-; CHECK-NEXT: li a6, 24
-; CHECK-NEXT: mul a5, a5, a6
+; CHECK-NEXT: slli a5, a5, 4
; CHECK-NEXT: add a5, sp, a5
; CHECK-NEXT: addi a5, a5, 16
; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
@@ -1173,11 +1181,16 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: and a6, a6, a3
; CHECK-NEXT: li a3, 63
; CHECK-NEXT: vl8re64.v v8, (a5)
-; CHECK-NEXT: addi a5, sp, 16
+; CHECK-NEXT: csrr a5, vlenb
+; CHECK-NEXT: li a7, 40
+; CHECK-NEXT: mul a5, a5, a7
+; CHECK-NEXT: add a5, sp, a5
+; CHECK-NEXT: addi a5, a5, 16
; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill
; CHECK-NEXT: vl8re64.v v16, (a0)
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: li a5, 24
+; CHECK-NEXT: mul a0, a0, a5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
@@ -1189,48 +1202,51 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: vsetvli zero, a6, e64, m8, ta, ma
-; CHECK-NEXT: vand.vx v16, v8, a3, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: li a2, 40
+; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vsll.vv v16, v16, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vsll.vv v16, v16, v8, v0.t
+; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a2, 40
+; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
-; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vnot.v v8, v8, v0.t
; CHECK-NEXT: vand.vx v16, v8, a3, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a2, 24
-; CHECK-NEXT: mul a0, a0, a2
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsrl.vi v8, v8, 1, v0.t
; CHECK-NEXT: vsrl.vv v8, v8, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a2, 24
+; CHECK-NEXT: li a2, 40
; CHECK-NEXT: mul a0, a0, a2
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
@@ -1247,15 +1263,14 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vand.vx v16, v8, a3, v0.t
+; CHECK-NEXT: vand.vx v8, v8, a3, v0.t
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 40
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
@@ -1266,8 +1281,7 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsll.vv v16, v16, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 40
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
@@ -1280,21 +1294,21 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
; CHECK-NEXT: vnot.v v16, v8, v0.t
; CHECK-NEXT: vand.vx v16, v16, a3, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 5
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsrl.vi v8, v8, 1, v0.t
; CHECK-NEXT: vsrl.vv v8, v8, v16, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 40
-; CHECK-NEXT: mul a0, a0, a1
+; CHECK-NEXT: slli a0, a0, 5
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vor.vv v8, v16, v8, v0.t
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: li a1, 40
; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
More information about the llvm-commits
mailing list