[llvm] 7f8451c - [RISCV] Use vsetvli instead of vlenb in Prologue/Epilogue (#113756)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 21 02:22:35 PDT 2025
Author: Kito Cheng
Date: 2025-03-21T17:22:32+08:00
New Revision: 7f8451c868cdc91481f9629517db3f53349514b7
URL: https://github.com/llvm/llvm-project/commit/7f8451c868cdc91481f9629517db3f53349514b7
DIFF: https://github.com/llvm/llvm-project/commit/7f8451c868cdc91481f9629517db3f53349514b7.diff
LOG: [RISCV] Use vsetvli instead of vlenb in Prologue/Epilogue (#113756)
Currently, we use `csrr` with `vlenb` to obtain the `VLEN`, but this is
not the only option. We can also use `vsetvli` with `e8`/`m1` to get
`VLENMAX`, which is equal to the VLEN. This method is preferable on some
microarchitectures and makes it easier to obtain values like `VLEN * 2`,
`VLEN * 4`, or `VLEN * 8`, reducing the number of instructions needed to
calculate VLEN multiples.
However, this approach is *NOT* always interchangeable, as it changes
the state of `VTYPE` and `VL`, which can alter the behavior of vector
instructions, potentially causing incorrect code generation if applied
after a vsetvli insertion. Therefore, we limit its use to the
prologue/epilogue for now, as there are no vector operations within the
prologue/epilogue sequence.
With further analysis, we may extend this approach beyond the
prologue/epilogue in the future, but starting here should be a good
first step.
This feature is gurded by the `+prefer-vsetvli-over-read-vlenb` feature,
which is disabled by default for now.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
llvm/lib/Target/RISCV/RISCVFeatures.td
llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll
llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll
llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 0656bfbef6b35..4d26f77d4ed2c 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -56,6 +56,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
MachineBasicBlock::iterator MBBI);
bool expandRV32ZdinxLoad(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI);
+ bool expandPseudoReadVLENBViaVSETVLIX0(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI);
#ifndef NDEBUG
unsigned getInstSizeInBytes(const MachineFunction &MF) const {
unsigned Size = 0;
@@ -164,6 +166,8 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoVMSET_M_B64:
// vmset.m vd => vmxnor.mm vd, vd, vd
return expandVMSET_VMCLR(MBB, MBBI, RISCV::VMXNOR_MM);
+ case RISCV::PseudoReadVLENBViaVSETVLIX0:
+ return expandPseudoReadVLENBViaVSETVLIX0(MBB, MBBI);
}
return false;
@@ -415,6 +419,24 @@ bool RISCVExpandPseudo::expandRV32ZdinxLoad(MachineBasicBlock &MBB,
return true;
}
+bool RISCVExpandPseudo::expandPseudoReadVLENBViaVSETVLIX0(
+ MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+ DebugLoc DL = MBBI->getDebugLoc();
+ Register Dst = MBBI->getOperand(0).getReg();
+ unsigned Mul = MBBI->getOperand(1).getImm();
+ RISCVVType::VLMUL VLMUL = RISCVVType::encodeLMUL(Mul, /*Fractional=*/false);
+ unsigned VTypeImm = RISCVVType::encodeVTYPE(
+ VLMUL, /*SEW=*/8, /*TailAgnostic=*/true, /*MaskAgnostic=*/true);
+
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::PseudoVSETVLIX0))
+ .addReg(Dst, RegState::Define)
+ .addReg(RISCV::X0, RegState::Kill)
+ .addImm(VTypeImm);
+
+ MBBI->eraseFromParent();
+ return true;
+}
+
class RISCVPreRAExpandPseudo : public MachineFunctionPass {
public:
const RISCVSubtarget *STI;
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 12f32ad6fef76..0fcb0c5344282 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1591,6 +1591,12 @@ def TuneVentanaVeyron : SubtargetFeature<"ventana-veyron", "RISCVProcFamily", "V
def TuneVXRMPipelineFlush : SubtargetFeature<"vxrm-pipeline-flush", "HasVXRMPipelineFlush",
"true", "VXRM writes causes pipeline flush">;
+def TunePreferVsetvliOverReadVLENB
+ : SubtargetFeature<"prefer-vsetvli-over-read-vlenb",
+ "PreferVsetvliOverReadVLENB",
+ "true",
+ "Prefer vsetvli over read vlenb CSR to calculate VLEN">;
+
// Assume that lock-free native-width atomics are available, even if the target
// and operating system combination would not usually provide them. The user
// is responsible for providing any necessary __sync implementations. Code
diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index e1314d4fee8a0..fd7471599f35c 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -2195,6 +2195,17 @@ bool RISCVFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
const MachineFunction *MF = MBB.getParent();
const auto *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
+ // Make sure VTYPE and VL are not live-in since we will use vsetvli in the
+ // prologue to get the VLEN, and that will clobber these registers.
+ //
+ // We may do also check the stack contains objects with scalable vector type,
+ // but this will require iterating over all the stack objects, but this may
+ // not worth since the situation is rare, we could do further check in future
+ // if we find it is necessary.
+ if (STI.preferVsetvliOverReadVLENB() &&
+ (MBB.isLiveIn(RISCV::VTYPE) || MBB.isLiveIn(RISCV::VL)))
+ return false;
+
if (!RVFI->useSaveRestoreLibCalls(*MF))
return true;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 988a3f3266113..d0e0ed91af7d8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -6051,6 +6051,11 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in {
[(set GPR:$rd, (riscv_read_vlenb))]>,
PseudoInstExpansion<(CSRRS GPR:$rd, SysRegVLENB.Encoding, X0)>,
Sched<[WriteRdVLENB]>;
+ let Defs = [VL, VTYPE] in {
+ def PseudoReadVLENBViaVSETVLIX0 : Pseudo<(outs GPR:$rd), (ins uimm5:$shamt),
+ []>,
+ Sched<[WriteVSETVLI, ReadVSETVLI]>;
+ }
}
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, isCodeGenOnly = 1,
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 2fb21e92de0d9..e75e21319eae3 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -226,21 +226,48 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
assert(isInt<32>(ScalableValue / (RISCV::RVVBitsPerBlock / 8)) &&
"Expect the number of vector registers within 32-bits.");
uint32_t NumOfVReg = ScalableValue / (RISCV::RVVBitsPerBlock / 8);
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg)
- .setMIFlag(Flag);
-
- if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() &&
- (NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) {
- unsigned Opc = NumOfVReg == 2 ? RISCV::SH1ADD :
- (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
- BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
- .addReg(ScratchReg, RegState::Kill).addReg(SrcReg)
+ // Only use vsetvli rather than vlenb if adjusting in the prologue or
+ // epilogue, otherwise it may disturb the VTYPE and VL status.
+ bool IsPrologueOrEpilogue =
+ Flag == MachineInstr::FrameSetup || Flag == MachineInstr::FrameDestroy;
+ bool UseVsetvliRatherThanVlenb =
+ IsPrologueOrEpilogue && ST.preferVsetvliOverReadVLENB();
+ if (UseVsetvliRatherThanVlenb && (NumOfVReg == 1 || NumOfVReg == 2 ||
+ NumOfVReg == 4 || NumOfVReg == 8)) {
+ BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENBViaVSETVLIX0),
+ ScratchReg)
+ .addImm(NumOfVReg)
.setMIFlag(Flag);
- } else {
- TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
- .addReg(SrcReg).addReg(ScratchReg, RegState::Kill)
+ .addReg(SrcReg)
+ .addReg(ScratchReg, RegState::Kill)
.setMIFlag(Flag);
+ } else {
+ if (UseVsetvliRatherThanVlenb)
+ BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENBViaVSETVLIX0),
+ ScratchReg)
+ .addImm(1)
+ .setMIFlag(Flag);
+ else
+ BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), ScratchReg)
+ .setMIFlag(Flag);
+
+ if (ScalableAdjOpc == RISCV::ADD && ST.hasStdExtZba() &&
+ (NumOfVReg == 2 || NumOfVReg == 4 || NumOfVReg == 8)) {
+ unsigned Opc = NumOfVReg == 2
+ ? RISCV::SH1ADD
+ : (NumOfVReg == 4 ? RISCV::SH2ADD : RISCV::SH3ADD);
+ BuildMI(MBB, II, DL, TII->get(Opc), DestReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .addReg(SrcReg)
+ .setMIFlag(Flag);
+ } else {
+ TII->mulImm(MF, MBB, II, DL, ScratchReg, NumOfVReg, Flag);
+ BuildMI(MBB, II, DL, TII->get(ScalableAdjOpc), DestReg)
+ .addReg(SrcReg)
+ .addReg(ScratchReg, RegState::Kill)
+ .setMIFlag(Flag);
+ }
}
SrcReg = DestReg;
KillSrcReg = true;
diff --git a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
index 7fe6bd24a2552..cba507874a32e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/allocate-lmul-2-4-8.ll
@@ -5,6 +5,12 @@
; RUN: | FileCheck %s --check-prefixes=CHECK,ZBA
; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
; RUN: | FileCheck %s --check-prefixes=CHECK,NOMUL
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+prefer-vsetvli-over-read-vlenb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK-NOZBA-VSETVLI
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+zba,+prefer-vsetvli-over-read-vlenb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK-ZBA-VSETVLI
+; RUN: llc -mtriple=riscv64 -mattr=+v,+prefer-vsetvli-over-read-vlenb -verify-machineinstrs < %s \
+; RUN: | FileCheck %s --check-prefixes=CHECK-NOMUL-VSETVLI
define void @lmul1() nounwind {
; CHECK-LABEL: lmul1:
@@ -14,6 +20,30 @@ define void @lmul1() nounwind {
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul1:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul1:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul1:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v = alloca <vscale x 1 x i64>
ret void
}
@@ -47,6 +77,30 @@ define void @lmul2() nounwind {
; NOMUL-NEXT: slli a0, a0, 1
; NOMUL-NEXT: add sp, sp, a0
; NOMUL-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul2:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul2:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul2:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v = alloca <vscale x 2 x i64>
ret void
}
@@ -67,6 +121,51 @@ define void @lmul4() nounwind {
; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 48
; CHECK-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul4:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul4:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul4:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v = alloca <vscale x 4 x i64>
ret void
}
@@ -87,6 +186,51 @@ define void @lmul8() nounwind {
; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 80
; CHECK-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul8:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul8:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul8:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v = alloca <vscale x 8 x i64>
ret void
}
@@ -120,6 +264,30 @@ define void @lmul1_and_2() nounwind {
; NOMUL-NEXT: slli a0, a0, 2
; NOMUL-NEXT: add sp, sp, a0
; NOMUL-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul1_and_2:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul1_and_2:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul1_and_2:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 1 x i64>
%v2 = alloca <vscale x 2 x i64>
ret void
@@ -141,6 +309,51 @@ define void @lmul2_and_4() nounwind {
; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 48
; CHECK-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul2_and_4:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul2_and_4:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul2_and_4:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 2 x i64>
%v2 = alloca <vscale x 4 x i64>
ret void
@@ -162,6 +375,51 @@ define void @lmul1_and_4() nounwind {
; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 48
; CHECK-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul1_and_4:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul1_and_4:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul1_and_4:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 1 x i64>
%v2 = alloca <vscale x 4 x i64>
ret void
@@ -201,6 +459,40 @@ define void @lmul2_and_1() nounwind {
; NOMUL-NEXT: add a0, a1, a0
; NOMUL-NEXT: add sp, sp, a0
; NOMUL-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul2_and_1:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: slli a1, a0, 1
+; CHECK-NOZBA-VSETVLI-NEXT: add a0, a1, a0
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: slli a1, a0, 1
+; CHECK-NOZBA-VSETVLI-NEXT: add a0, a1, a0
+; CHECK-NOZBA-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul2_and_1:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: sh1add a0, a0, a0
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: sh1add a0, a0, a0
+; CHECK-ZBA-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul2_and_1:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: slli a1, a0, 1
+; CHECK-NOMUL-VSETVLI-NEXT: add a0, a1, a0
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: slli a1, a0, 1
+; CHECK-NOMUL-VSETVLI-NEXT: add a0, a1, a0
+; CHECK-NOMUL-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 2 x i64>
%v2 = alloca <vscale x 1 x i64>
ret void
@@ -259,6 +551,59 @@ define void @lmul4_and_1() nounwind {
; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; NOMUL-NEXT: addi sp, sp, 48
; NOMUL-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul4_and_1:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: li a1, 6
+; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul4_and_1:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 1
+; CHECK-ZBA-VSETVLI-NEXT: sh1add a0, a0, a0
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul4_and_1:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1
+; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1
+; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 4 x i64>
%v2 = alloca <vscale x 1 x i64>
ret void
@@ -317,6 +662,59 @@ define void @lmul4_and_2() nounwind {
; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; NOMUL-NEXT: addi sp, sp, 48
; NOMUL-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul4_and_2:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: li a1, 6
+; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul4_and_2:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 1
+; CHECK-ZBA-VSETVLI-NEXT: sh1add a0, a0, a0
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul4_and_2:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1
+; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1
+; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 4 x i64>
%v2 = alloca <vscale x 2 x i64>
ret void
@@ -377,6 +775,61 @@ define void @lmul4_and_2_x2_0() nounwind {
; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; NOMUL-NEXT: addi sp, sp, 48
; NOMUL-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul4_and_2_x2_0:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: li a1, 14
+; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul4_and_2_x2_0:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: li a1, 14
+; CHECK-ZBA-VSETVLI-NEXT: mul a0, a0, a1
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul4_and_2_x2_0:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1
+; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1
+; CHECK-NOMUL-VSETVLI-NEXT: add a1, a1, a0
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1
+; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 4 x i64>
%v2 = alloca <vscale x 2 x i64>
%v3 = alloca <vscale x 4 x i64>
@@ -437,6 +890,59 @@ define void @lmul4_and_2_x2_1() nounwind {
; NOMUL-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; NOMUL-NEXT: addi sp, sp, 48
; NOMUL-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul4_and_2_x2_1:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: li a1, 12
+; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul4_and_2_x2_1:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 2
+; CHECK-ZBA-VSETVLI-NEXT: sh1add a0, a0, a0
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul4_and_2_x2_1:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 2
+; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1
+; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 4 x i64>
%v3 = alloca <vscale x 4 x i64>
%v2 = alloca <vscale x 2 x i64>
@@ -486,6 +992,42 @@ define void @gpr_and_lmul1_and_2() nounwind {
; NOMUL-NEXT: add sp, sp, a0
; NOMUL-NEXT: addi sp, sp, 16
; NOMUL-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: gpr_and_lmul1_and_2:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -16
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: li a0, 3
+; CHECK-NOZBA-VSETVLI-NEXT: sd a0, 8(sp)
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 16
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: gpr_and_lmul1_and_2:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -16
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: li a0, 3
+; CHECK-ZBA-VSETVLI-NEXT: sd a0, 8(sp)
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 16
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: gpr_and_lmul1_and_2:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -16
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: li a0, 3
+; CHECK-NOMUL-VSETVLI-NEXT: sd a0, 8(sp)
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 16
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%x1 = alloca i64
%v1 = alloca <vscale x 1 x i64>
%v2 = alloca <vscale x 2 x i64>
@@ -511,6 +1053,57 @@ define void @gpr_and_lmul1_and_4() nounwind {
; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 48
; CHECK-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: gpr_and_lmul1_and_4:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOZBA-VSETVLI-NEXT: li a0, 3
+; CHECK-NOZBA-VSETVLI-NEXT: sd a0, 8(sp)
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: gpr_and_lmul1_and_4:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-ZBA-VSETVLI-NEXT: li a0, 3
+; CHECK-ZBA-VSETVLI-NEXT: sd a0, 8(sp)
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: gpr_and_lmul1_and_4:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -48
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -32
+; CHECK-NOMUL-VSETVLI-NEXT: li a0, 3
+; CHECK-NOMUL-VSETVLI-NEXT: sd a0, 8(sp)
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -48
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 48
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%x1 = alloca i64
%v1 = alloca <vscale x 1 x i64>
%v2 = alloca <vscale x 4 x i64>
@@ -534,6 +1127,54 @@ define void @lmul_1_2_4_8() nounwind {
; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 80
; CHECK-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul_1_2_4_8:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: slli a0, a0, 4
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul_1_2_4_8:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 4
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul_1_2_4_8:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 4
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 1 x i64>
%v2 = alloca <vscale x 2 x i64>
%v4 = alloca <vscale x 4 x i64>
@@ -557,6 +1198,54 @@ define void @lmul_1_2_4_8_x2_0() nounwind {
; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 80
; CHECK-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul_1_2_4_8_x2_0:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: slli a0, a0, 5
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul_1_2_4_8_x2_0:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 5
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul_1_2_4_8_x2_0:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 5
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 1 x i64>
%v2 = alloca <vscale x 1 x i64>
%v3 = alloca <vscale x 2 x i64>
@@ -584,6 +1273,54 @@ define void @lmul_1_2_4_8_x2_1() nounwind {
; CHECK-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; CHECK-NEXT: addi sp, sp, 80
; CHECK-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul_1_2_4_8_x2_1:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: slli a0, a0, 5
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul_1_2_4_8_x2_1:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 5
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul_1_2_4_8_x2_1:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 5
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v8 = alloca <vscale x 8 x i64>
%v7 = alloca <vscale x 8 x i64>
%v6 = alloca <vscale x 4 x i64>
@@ -624,6 +1361,30 @@ define void @masks() nounwind {
; NOMUL-NEXT: slli a0, a0, 2
; NOMUL-NEXT: add sp, sp, a0
; NOMUL-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: masks:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: masks:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: masks:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: add sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 1 x i1>
%v2 = alloca <vscale x 2 x i1>
%v4 = alloca <vscale x 4 x i1>
@@ -684,6 +1445,59 @@ define void @lmul_8_x5() nounwind {
; NOMUL-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; NOMUL-NEXT: addi sp, sp, 80
; NOMUL-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul_8_x5:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: li a1, 40
+; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul_8_x5:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 3
+; CHECK-ZBA-VSETVLI-NEXT: sh2add a0, a0, a0
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul_8_x5:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 3
+; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 2
+; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 8 x i64>
%v2 = alloca <vscale x 8 x i64>
%v3 = alloca <vscale x 8 x i64>
@@ -745,6 +1559,59 @@ define void @lmul_8_x9() nounwind {
; NOMUL-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
; NOMUL-NEXT: addi sp, sp, 80
; NOMUL-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul_8_x9:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: li a1, 72
+; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul_8_x9:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 3
+; CHECK-ZBA-VSETVLI-NEXT: sh3add a0, a0, a0
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul_8_x9:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -80
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 80
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 3
+; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 3
+; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -64
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -80
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 80
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 8 x i64>
%v2 = alloca <vscale x 8 x i64>
%v3 = alloca <vscale x 8 x i64>
@@ -851,6 +1718,100 @@ define void @lmul_16_align() nounwind {
; NOMUL-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
; NOMUL-NEXT: addi sp, sp, 144
; NOMUL-NEXT: ret
+;
+; CHECK-NOZBA-VSETVLI-LABEL: lmul_16_align:
+; CHECK-NOZBA-VSETVLI: # %bb.0:
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, -144
+; CHECK-NOZBA-VSETVLI-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
+; CHECK-NOZBA-VSETVLI-NEXT: addi s0, sp, 144
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: li a1, 24
+; CHECK-NOZBA-VSETVLI-NEXT: mul a0, a0, a1
+; CHECK-NOZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: andi sp, sp, -128
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: vmv.v.i v8, 0
+; CHECK-NOZBA-VSETVLI-NEXT: csrr a0, vlenb
+; CHECK-NOZBA-VSETVLI-NEXT: add a0, sp, a0
+; CHECK-NOZBA-VSETVLI-NEXT: addi a0, a0, 128
+; CHECK-NOZBA-VSETVLI-NEXT: csrr a1, vlenb
+; CHECK-NOZBA-VSETVLI-NEXT: vs8r.v v8, (a0)
+; CHECK-NOZBA-VSETVLI-NEXT: slli a1, a1, 3
+; CHECK-NOZBA-VSETVLI-NEXT: add a0, a0, a1
+; CHECK-NOZBA-VSETVLI-NEXT: vs8r.v v8, (a0)
+; CHECK-NOZBA-VSETVLI-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NOZBA-VSETVLI-NEXT: vmv.v.i v8, 0
+; CHECK-NOZBA-VSETVLI-NEXT: addi a0, sp, 128
+; CHECK-NOZBA-VSETVLI-NEXT: vs1r.v v8, (a0)
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, s0, -144
+; CHECK-NOZBA-VSETVLI-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
+; CHECK-NOZBA-VSETVLI-NEXT: addi sp, sp, 144
+; CHECK-NOZBA-VSETVLI-NEXT: ret
+;
+; CHECK-ZBA-VSETVLI-LABEL: lmul_16_align:
+; CHECK-ZBA-VSETVLI: # %bb.0:
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, -144
+; CHECK-ZBA-VSETVLI-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
+; CHECK-ZBA-VSETVLI-NEXT: addi s0, sp, 144
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: slli a0, a0, 3
+; CHECK-ZBA-VSETVLI-NEXT: sh1add a0, a0, a0
+; CHECK-ZBA-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: andi sp, sp, -128
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: vmv.v.i v8, 0
+; CHECK-ZBA-VSETVLI-NEXT: csrr a0, vlenb
+; CHECK-ZBA-VSETVLI-NEXT: add a0, sp, a0
+; CHECK-ZBA-VSETVLI-NEXT: addi a0, a0, 128
+; CHECK-ZBA-VSETVLI-NEXT: csrr a1, vlenb
+; CHECK-ZBA-VSETVLI-NEXT: vs8r.v v8, (a0)
+; CHECK-ZBA-VSETVLI-NEXT: sh3add a0, a1, a0
+; CHECK-ZBA-VSETVLI-NEXT: vs8r.v v8, (a0)
+; CHECK-ZBA-VSETVLI-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-ZBA-VSETVLI-NEXT: vmv.v.i v8, 0
+; CHECK-ZBA-VSETVLI-NEXT: addi a0, sp, 128
+; CHECK-ZBA-VSETVLI-NEXT: vs1r.v v8, (a0)
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, s0, -144
+; CHECK-ZBA-VSETVLI-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
+; CHECK-ZBA-VSETVLI-NEXT: addi sp, sp, 144
+; CHECK-ZBA-VSETVLI-NEXT: ret
+;
+; CHECK-NOMUL-VSETVLI-LABEL: lmul_16_align:
+; CHECK-NOMUL-VSETVLI: # %bb.0:
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, -144
+; CHECK-NOMUL-VSETVLI-NEXT: sd ra, 136(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: sd s0, 128(sp) # 8-byte Folded Spill
+; CHECK-NOMUL-VSETVLI-NEXT: addi s0, sp, 144
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 3
+; CHECK-NOMUL-VSETVLI-NEXT: mv a1, a0
+; CHECK-NOMUL-VSETVLI-NEXT: slli a0, a0, 1
+; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1
+; CHECK-NOMUL-VSETVLI-NEXT: sub sp, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: andi sp, sp, -128
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e64, m8, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: vmv.v.i v8, 0
+; CHECK-NOMUL-VSETVLI-NEXT: csrr a0, vlenb
+; CHECK-NOMUL-VSETVLI-NEXT: add a0, sp, a0
+; CHECK-NOMUL-VSETVLI-NEXT: addi a0, a0, 128
+; CHECK-NOMUL-VSETVLI-NEXT: csrr a1, vlenb
+; CHECK-NOMUL-VSETVLI-NEXT: vs8r.v v8, (a0)
+; CHECK-NOMUL-VSETVLI-NEXT: slli a1, a1, 3
+; CHECK-NOMUL-VSETVLI-NEXT: add a0, a0, a1
+; CHECK-NOMUL-VSETVLI-NEXT: vs8r.v v8, (a0)
+; CHECK-NOMUL-VSETVLI-NEXT: vsetvli a0, zero, e64, m1, ta, ma
+; CHECK-NOMUL-VSETVLI-NEXT: vmv.v.i v8, 0
+; CHECK-NOMUL-VSETVLI-NEXT: addi a0, sp, 128
+; CHECK-NOMUL-VSETVLI-NEXT: vs1r.v v8, (a0)
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, s0, -144
+; CHECK-NOMUL-VSETVLI-NEXT: ld ra, 136(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: ld s0, 128(sp) # 8-byte Folded Reload
+; CHECK-NOMUL-VSETVLI-NEXT: addi sp, sp, 144
+; CHECK-NOMUL-VSETVLI-NEXT: ret
%v1 = alloca <vscale x 16 x i64>
%v2 = alloca <vscale x 1 x i64>
store <vscale x 16 x i64> zeroinitializer, ptr %v1
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
index 1205ff17d113e..c9e11de38007a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
@@ -5,6 +5,12 @@
; RUN: | FileCheck --check-prefix=SPILL-O2 %s
; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zcmp -O2 < %s \
; RUN: | FileCheck --check-prefix=SPILL-O2-ZCMP %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+prefer-vsetvli-over-read-vlenb -O0 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O0-VSETVLI %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+prefer-vsetvli-over-read-vlenb -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2-VSETVLI %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zcmp,+prefer-vsetvli-over-read-vlenb -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2-ZCMP-VSETVLI %s
@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
@@ -131,6 +137,123 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O2-ZCMP-NEXT: add sp, sp, a0
; SPILL-O2-ZCMP-NEXT: .cfi_def_cfa sp, 16
; SPILL-O2-ZCMP-NEXT: cm.popret {ra, s0}, 16
+;
+; SPILL-O0-VSETVLI-LABEL: foo:
+; SPILL-O0-VSETVLI: # %bb.0:
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -32
+; SPILL-O0-VSETVLI-NEXT: .cfi_def_cfa_offset 32
+; SPILL-O0-VSETVLI-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; SPILL-O0-VSETVLI-NEXT: .cfi_offset ra, -4
+; SPILL-O0-VSETVLI-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a1
+; SPILL-O0-VSETVLI-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; SPILL-O0-VSETVLI-NEXT: sw a0, 8(sp) # 4-byte Folded Spill
+; SPILL-O0-VSETVLI-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: vmv1r.v v10, v9
+; SPILL-O0-VSETVLI-NEXT: vmv1r.v v9, v8
+; SPILL-O0-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O0-VSETVLI-NEXT: add a1, sp, a1
+; SPILL-O0-VSETVLI-NEXT: addi a1, a1, 16
+; SPILL-O0-VSETVLI-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a0, e64, m1, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vfadd.vv v8, v9, v10
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: lui a0, %hi(.L.str)
+; SPILL-O0-VSETVLI-NEXT: addi a0, a0, %lo(.L.str)
+; SPILL-O0-VSETVLI-NEXT: call puts
+; SPILL-O0-VSETVLI-NEXT: addi a1, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl1r.v v10, (a1) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O0-VSETVLI-NEXT: add a1, sp, a1
+; SPILL-O0-VSETVLI-NEXT: addi a1, a1, 16
+; SPILL-O0-VSETVLI-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: # kill: def $x11 killed $x10
+; SPILL-O0-VSETVLI-NEXT: lw a0, 8(sp) # 4-byte Folded Reload
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a0, e64, m1, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vfadd.vv v8, v9, v10
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: .cfi_def_cfa sp, 32
+; SPILL-O0-VSETVLI-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; SPILL-O0-VSETVLI-NEXT: .cfi_restore ra
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 32
+; SPILL-O0-VSETVLI-NEXT: .cfi_def_cfa_offset 0
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: foo:
+; SPILL-O2-VSETVLI: # %bb.0:
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -32
+; SPILL-O2-VSETVLI-NEXT: .cfi_def_cfa_offset 32
+; SPILL-O2-VSETVLI-NEXT: sw ra, 28(sp) # 4-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: sw s0, 24(sp) # 4-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: .cfi_offset ra, -4
+; SPILL-O2-VSETVLI-NEXT: .cfi_offset s0, -8
+; SPILL-O2-VSETVLI-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a1
+; SPILL-O2-VSETVLI-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; SPILL-O2-VSETVLI-NEXT: mv s0, a0
+; SPILL-O2-VSETVLI-NEXT: addi a1, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vfadd.vv v9, v8, v9
+; SPILL-O2-VSETVLI-NEXT: csrr a0, vlenb
+; SPILL-O2-VSETVLI-NEXT: add a0, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, a0, 16
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: lui a0, %hi(.L.str)
+; SPILL-O2-VSETVLI-NEXT: addi a0, a0, %lo(.L.str)
+; SPILL-O2-VSETVLI-NEXT: call puts
+; SPILL-O2-VSETVLI-NEXT: csrr a0, vlenb
+; SPILL-O2-VSETVLI-NEXT: add a0, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, a0, 16
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, s0, e64, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vfadd.vv v8, v9, v8
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: .cfi_def_cfa sp, 32
+; SPILL-O2-VSETVLI-NEXT: lw ra, 28(sp) # 4-byte Folded Reload
+; SPILL-O2-VSETVLI-NEXT: lw s0, 24(sp) # 4-byte Folded Reload
+; SPILL-O2-VSETVLI-NEXT: .cfi_restore ra
+; SPILL-O2-VSETVLI-NEXT: .cfi_restore s0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 32
+; SPILL-O2-VSETVLI-NEXT: .cfi_def_cfa_offset 0
+; SPILL-O2-VSETVLI-NEXT: ret
+;
+; SPILL-O2-ZCMP-VSETVLI-LABEL: foo:
+; SPILL-O2-ZCMP-VSETVLI: # %bb.0:
+; SPILL-O2-ZCMP-VSETVLI-NEXT: cm.push {ra, s0}, -16
+; SPILL-O2-ZCMP-VSETVLI-NEXT: .cfi_def_cfa_offset 16
+; SPILL-O2-ZCMP-VSETVLI-NEXT: .cfi_offset ra, -8
+; SPILL-O2-ZCMP-VSETVLI-NEXT: .cfi_offset s0, -4
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; SPILL-O2-ZCMP-VSETVLI-NEXT: sub sp, sp, a1
+; SPILL-O2-ZCMP-VSETVLI-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
+; SPILL-O2-ZCMP-VSETVLI-NEXT: mv s0, a0
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vfadd.vv v9, v8, v9
+; SPILL-O2-ZCMP-VSETVLI-NEXT: csrr a0, vlenb
+; SPILL-O2-ZCMP-VSETVLI-NEXT: add a0, a0, sp
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-ZCMP-VSETVLI-NEXT: lui a0, %hi(.L.str)
+; SPILL-O2-ZCMP-VSETVLI-NEXT: addi a0, a0, %lo(.L.str)
+; SPILL-O2-ZCMP-VSETVLI-NEXT: call puts
+; SPILL-O2-ZCMP-VSETVLI-NEXT: csrr a0, vlenb
+; SPILL-O2-ZCMP-VSETVLI-NEXT: add a0, a0, sp
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vl1r.v v9, (sp) # Unknown-size Folded Reload
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vsetvli zero, s0, e64, m1, ta, ma
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vfadd.vv v8, v9, v8
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O2-ZCMP-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-ZCMP-VSETVLI-NEXT: .cfi_def_cfa sp, 16
+; SPILL-O2-ZCMP-VSETVLI-NEXT: cm.popret {ra, s0}, 16
{
%x = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i32 7, i32 %gvl)
%call = call signext i32 @puts(ptr @.str)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll
index f0cd067fd0448..80b6c45b9c1ac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector.ll
@@ -3,6 +3,10 @@
; RUN: | FileCheck --check-prefix=SPILL-O0 %s
; RUN: llc -mtriple=riscv32 -mattr=+v -O2 < %s \
; RUN: | FileCheck --check-prefix=SPILL-O2 %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+prefer-vsetvli-over-read-vlenb -O0 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O0-VSETVLI %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+prefer-vsetvli-over-read-vlenb -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2-VSETVLI %s
define <vscale x 1 x i32> @spill_lmul_mf2(<vscale x 1 x i32> %va) nounwind {
; SPILL-O0-LABEL: spill_lmul_mf2:
@@ -35,6 +39,37 @@ define <vscale x 1 x i32> @spill_lmul_mf2(<vscale x 1 x i32> %va) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_lmul_mf2:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_lmul_mf2:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
call void asm sideeffect "",
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
@@ -73,6 +108,37 @@ define <vscale x 2 x i32> @spill_lmul_1(<vscale x 2 x i32> %va) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_lmul_1:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_lmul_1:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
call void asm sideeffect "",
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
@@ -115,6 +181,37 @@ define <vscale x 4 x i32> @spill_lmul_2(<vscale x 4 x i32> %va) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_lmul_2:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_lmul_2:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
call void asm sideeffect "",
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
@@ -157,6 +254,37 @@ define <vscale x 8 x i32> @spill_lmul_4(<vscale x 8 x i32> %va) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_lmul_4:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_lmul_4:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
call void asm sideeffect "",
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
@@ -199,6 +327,37 @@ define <vscale x 16 x i32> @spill_lmul_8(<vscale x 16 x i32> %va) nounwind {
; SPILL-O2-NEXT: add sp, sp, a0
; SPILL-O2-NEXT: addi sp, sp, 16
; SPILL-O2-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_lmul_8:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_lmul_8:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
call void asm sideeffect "",
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
index adb15f02e33a4..485015577b8af 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
@@ -5,6 +5,10 @@
; RUN: | FileCheck --check-prefix=SPILL-O2 %s
; RUN: llc -mtriple=riscv32 -mattr=+v -riscv-v-vector-bits-max=128 -O2 < %s \
; RUN: | FileCheck --check-prefix=SPILL-O2-VLEN128 %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+prefer-vsetvli-over-read-vlenb -O0 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O0-VSETVLI %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+m,+prefer-vsetvli-over-read-vlenb -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2-VSETVLI %s
define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
; SPILL-O0-LABEL: spill_zvlsseg_nxv1i32:
@@ -74,6 +78,50 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv1i32:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8_v9
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vmv1r.v v8, v9
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv1i32:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 5)
call void asm sideeffect "",
@@ -150,6 +198,50 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv2i32:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8_v9
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vmv1r.v v8, v9
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv2i32:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 5)
call void asm sideeffect "",
@@ -231,6 +323,53 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv4i32:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8m2_v10m2
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vmv2r.v v8, v10
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv4i32:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m4, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 5)
call void asm sideeffect "",
@@ -312,6 +451,53 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv8i32:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m4, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8m4_v12m4
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vmv4r.v v8, v12
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv8i32:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m8, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
+; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
+; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 5)
call void asm sideeffect "",
@@ -403,6 +589,61 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 96
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg3_nxv4i32:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8m2_v10m2_v12m2
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vlseg3e32.v v8, (a0)
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vmv2r.v v8, v10
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg3_nxv4i32:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: li a3, 6
+; SPILL-O2-VSETVLI-NEXT: mul a2, a2, a3
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vlseg3e32.v v8, (a0)
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: li a1, 6
+; SPILL-O2-VSETVLI-NEXT: mul a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 5)
call void asm sideeffect "",
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
index 2cd80ef79bd82..e885f25cf0b26 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
@@ -7,6 +7,12 @@
; RUN: | FileCheck --check-prefix=SPILL-O2-VLEN128 %s
; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zcmp -O2 < %s \
; RUN: | FileCheck --check-prefix=SPILL-O2-ZCMP %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+prefer-vsetvli-over-read-vlenb -O0 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O0-VSETVLI %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+prefer-vsetvli-over-read-vlenb -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2-VSETVLI %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zcmp,+prefer-vsetvli-over-read-vlenb -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2-ZCMP-VSETVLI %s
@.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
@@ -147,6 +153,108 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
; SPILL-O2-ZCMP-NEXT: slli a0, a0, 1
; SPILL-O2-ZCMP-NEXT: add sp, sp, a0
; SPILL-O2-ZCMP-NEXT: cm.popret {ra, s0}, 32
+;
+; SPILL-O0-VSETVLI-LABEL: foo:
+; SPILL-O0-VSETVLI: # %bb.0:
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -48
+; SPILL-O0-VSETVLI-NEXT: sd ra, 40(sp) # 8-byte Folded Spill
+; SPILL-O0-VSETVLI-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a1
+; SPILL-O0-VSETVLI-NEXT: sd a0, 16(sp) # 8-byte Folded Spill
+; SPILL-O0-VSETVLI-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: vmv1r.v v10, v9
+; SPILL-O0-VSETVLI-NEXT: vmv1r.v v9, v8
+; SPILL-O0-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O0-VSETVLI-NEXT: add a1, sp, a1
+; SPILL-O0-VSETVLI-NEXT: addi a1, a1, 32
+; SPILL-O0-VSETVLI-NEXT: vs1r.v v9, (a1) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a0, e64, m1, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vfadd.vv v8, v9, v10
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 32
+; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: lui a0, %hi(.L.str)
+; SPILL-O0-VSETVLI-NEXT: addi a0, a0, %lo(.L.str)
+; SPILL-O0-VSETVLI-NEXT: call puts
+; SPILL-O0-VSETVLI-NEXT: addi a1, sp, 32
+; SPILL-O0-VSETVLI-NEXT: vl1r.v v10, (a1) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O0-VSETVLI-NEXT: add a1, sp, a1
+; SPILL-O0-VSETVLI-NEXT: addi a1, a1, 32
+; SPILL-O0-VSETVLI-NEXT: vl1r.v v9, (a1) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: # kill: def $x11 killed $x10
+; SPILL-O0-VSETVLI-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a0, e64, m1, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vfadd.vv v8, v9, v10
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 48
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: foo:
+; SPILL-O2-VSETVLI: # %bb.0:
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -32
+; SPILL-O2-VSETVLI-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a1
+; SPILL-O2-VSETVLI-NEXT: mv s0, a0
+; SPILL-O2-VSETVLI-NEXT: addi a1, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vfadd.vv v9, v8, v9
+; SPILL-O2-VSETVLI-NEXT: csrr a0, vlenb
+; SPILL-O2-VSETVLI-NEXT: add a0, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, a0, 16
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: lui a0, %hi(.L.str)
+; SPILL-O2-VSETVLI-NEXT: addi a0, a0, %lo(.L.str)
+; SPILL-O2-VSETVLI-NEXT: call puts
+; SPILL-O2-VSETVLI-NEXT: csrr a0, vlenb
+; SPILL-O2-VSETVLI-NEXT: add a0, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, a0, 16
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, s0, e64, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vfadd.vv v8, v9, v8
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; SPILL-O2-VSETVLI-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 32
+; SPILL-O2-VSETVLI-NEXT: ret
+;
+; SPILL-O2-ZCMP-VSETVLI-LABEL: foo:
+; SPILL-O2-ZCMP-VSETVLI: # %bb.0:
+; SPILL-O2-ZCMP-VSETVLI-NEXT: cm.push {ra, s0}, -32
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; SPILL-O2-ZCMP-VSETVLI-NEXT: sub sp, sp, a1
+; SPILL-O2-ZCMP-VSETVLI-NEXT: mv s0, a0
+; SPILL-O2-ZCMP-VSETVLI-NEXT: addi a1, sp, 16
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vfadd.vv v9, v8, v9
+; SPILL-O2-ZCMP-VSETVLI-NEXT: csrr a0, vlenb
+; SPILL-O2-ZCMP-VSETVLI-NEXT: add a0, a0, sp
+; SPILL-O2-ZCMP-VSETVLI-NEXT: addi a0, a0, 16
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-ZCMP-VSETVLI-NEXT: lui a0, %hi(.L.str)
+; SPILL-O2-ZCMP-VSETVLI-NEXT: addi a0, a0, %lo(.L.str)
+; SPILL-O2-ZCMP-VSETVLI-NEXT: call puts
+; SPILL-O2-ZCMP-VSETVLI-NEXT: csrr a0, vlenb
+; SPILL-O2-ZCMP-VSETVLI-NEXT: add a0, a0, sp
+; SPILL-O2-ZCMP-VSETVLI-NEXT: addi a0, a0, 16
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-ZCMP-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vsetvli zero, s0, e64, m1, ta, ma
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vfadd.vv v8, v9, v8
+; SPILL-O2-ZCMP-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O2-ZCMP-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-ZCMP-VSETVLI-NEXT: cm.popret {ra, s0}, 32
{
%x = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %gvl)
%call = call signext i32 @puts(ptr @.str)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll
index 957a23f0069b8..c8397a2e57317 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector.ll
@@ -5,6 +5,10 @@
; RUN: | FileCheck --check-prefix=SPILL-O2 %s
; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -riscv-v-vector-bits-max=128 -O2 < %s \
; RUN: | FileCheck --check-prefix=SPILL-O2-VLEN128 %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+prefer-vsetvli-over-read-vlenb -O0 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O0-VSETVLI %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+prefer-vsetvli-over-read-vlenb -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2-VSETVLI %s
define <vscale x 1 x i64> @spill_lmul_1(<vscale x 1 x i64> %va) nounwind {
; SPILL-O0-LABEL: spill_lmul_1:
@@ -50,6 +54,37 @@ define <vscale x 1 x i64> @spill_lmul_1(<vscale x 1 x i64> %va) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_lmul_1:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_lmul_1:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
call void asm sideeffect "",
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
@@ -105,6 +140,37 @@ define <vscale x 2 x i64> @spill_lmul_2(<vscale x 2 x i64> %va) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_lmul_2:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_lmul_2:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
call void asm sideeffect "",
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
@@ -160,6 +226,37 @@ define <vscale x 4 x i64> @spill_lmul_4(<vscale x 4 x i64> %va) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_lmul_4:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_lmul_4:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
call void asm sideeffect "",
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
@@ -215,6 +312,37 @@ define <vscale x 8 x i64> @spill_lmul_8(<vscale x 8 x i64> %va) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_lmul_8:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_lmul_8:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
call void asm sideeffect "",
"~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
index ff0f1d7748668..092496e613cf7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
@@ -5,6 +5,10 @@
; RUN: | FileCheck --check-prefix=SPILL-O2 %s
; RUN: llc -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -O2 < %s \
; RUN: | FileCheck --check-prefix=SPILL-O2-VLEN128 %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+prefer-vsetvli-over-read-vlenb -O0 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O0-VSETVLI %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+m,+prefer-vsetvli-over-read-vlenb -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2-VSETVLI %s
define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
; SPILL-O0-LABEL: spill_zvlsseg_nxv1i32:
@@ -74,6 +78,50 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv1i32:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8_v9
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vmv1r.v v8, v9
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv1i32:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 5)
call void asm sideeffect "",
@@ -150,6 +198,50 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv2i32:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8_v9
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vmv1r.v v8, v9
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv2i32:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 5)
call void asm sideeffect "",
@@ -231,6 +323,53 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv4i32:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8m2_v10m2
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vmv2r.v v8, v10
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv4i32:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m4, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 5)
call void asm sideeffect "",
@@ -312,6 +451,53 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg_nxv8i32:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m4, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8m4_v12m4
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vmv4r.v v8, v12
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg_nxv8i32:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m8, ta, ma
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
+; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vs4r.v v12, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
+; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 5)
call void asm sideeffect "",
@@ -403,6 +589,61 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 96
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
+;
+; SPILL-O0-VSETVLI-LABEL: spill_zvlsseg3_nxv4i32:
+; SPILL-O0-VSETVLI: # %bb.0: # %entry
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O0-VSETVLI-NEXT: vsetvli a2, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O0-VSETVLI-NEXT: # implicit-def: $v8m2_v10m2_v12m2
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vlseg3e32.v v8, (a0)
+; SPILL-O0-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, tu, ma
+; SPILL-O0-VSETVLI-NEXT: vmv2r.v v8, v10
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O0-VSETVLI-NEXT: #APP
+; SPILL-O0-VSETVLI-NEXT: #NO_APP
+; SPILL-O0-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O0-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
+; SPILL-O0-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O0-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O0-VSETVLI-NEXT: ret
+;
+; SPILL-O2-VSETVLI-LABEL: spill_zvlsseg3_nxv4i32:
+; SPILL-O2-VSETVLI: # %bb.0: # %entry
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, -16
+; SPILL-O2-VSETVLI-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: li a3, 6
+; SPILL-O2-VSETVLI-NEXT: mul a2, a2, a3
+; SPILL-O2-VSETVLI-NEXT: sub sp, sp, a2
+; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; SPILL-O2-VSETVLI-NEXT: vlseg3e32.v v8, (a0)
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v12, (a0) # Unknown-size Folded Spill
+; SPILL-O2-VSETVLI-NEXT: #APP
+; SPILL-O2-VSETVLI-NEXT: #NO_APP
+; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
+; SPILL-O2-VSETVLI-NEXT: li a1, 6
+; SPILL-O2-VSETVLI-NEXT: mul a0, a0, a1
+; SPILL-O2-VSETVLI-NEXT: add sp, sp, a0
+; SPILL-O2-VSETVLI-NEXT: addi sp, sp, 16
+; SPILL-O2-VSETVLI-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 5)
call void asm sideeffect "",
More information about the llvm-commits
mailing list