[llvm] [RISCV] Optimize the spill/reload of segment registers (PR #153184)
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 14 04:48:03 PDT 2025
https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/153184
>From 6291bcbb7658d80d154b2401a147d7ab6c82908f Mon Sep 17 00:00:00 2001
From: Pengcheng Wang <wangpengcheng.pp at bytedance.com>
Date: Tue, 12 Aug 2025 20:58:09 +0800
Subject: [PATCH 1/7] [RISCV] Optimize the spill/reload of segment registers
The simplest way is:
1. Save `vtype` to a scalar register.
2. Insert a `vsetvli`.
3. Use segment load/store.
4. Restore `vtype` via `vsetvl`.
But `vsetvl` is usually slow, so this PR is not in this way.
Instead, we use wider whole load/store instructions if the register
encoding is aligned. We have done the same optimization for COPY in
https://github.com/llvm/llvm-project/pull/84455.
We found this suboptimal implementation when porting some video codec
kernels via RVV intrinsics.
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 14 +-
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 227 ++++++++++--------
llvm/lib/Target/RISCV/RISCVRegisterInfo.h | 3 +
.../early-clobber-tied-def-subreg-liveness.ll | 20 +-
...regalloc-last-chance-recoloring-failure.ll | 24 +-
.../CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll | 132 +++-------
.../CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll | 132 +++-------
llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir | 33 +--
8 files changed, 216 insertions(+), 369 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 085064eee896a..7b4a1de167695 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -382,7 +382,7 @@ void RISCVInstrInfo::copyPhysRegVector(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
const DebugLoc &DL, MCRegister DstReg, MCRegister SrcReg, bool KillSrc,
const TargetRegisterClass *RegClass) const {
- const TargetRegisterInfo *TRI = STI.getRegisterInfo();
+ const RISCVRegisterInfo *TRI = STI.getRegisterInfo();
RISCVVType::VLMUL LMul = RISCVRI::getLMul(RegClass->TSFlags);
unsigned NF = RISCVRI::getNF(RegClass->TSFlags);
@@ -444,13 +444,7 @@ void RISCVInstrInfo::copyPhysRegVector(
return {RISCVVType::LMUL_1, RISCV::VRRegClass, RISCV::VMV1R_V,
RISCV::PseudoVMV_V_V_M1, RISCV::PseudoVMV_V_I_M1};
};
- auto FindRegWithEncoding = [TRI](const TargetRegisterClass &RegClass,
- uint16_t Encoding) {
- MCRegister Reg = RISCV::V0 + Encoding;
- if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVVType::LMUL_1)
- return Reg;
- return TRI->getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
- };
+
while (I != NumRegs) {
// For non-segment copying, we only do this once as the registers are always
// aligned.
@@ -470,9 +464,9 @@ void RISCVInstrInfo::copyPhysRegVector(
// Emit actual copying.
// For reversed copying, the encoding should be decreased.
- MCRegister ActualSrcReg = FindRegWithEncoding(
+ MCRegister ActualSrcReg = TRI->findVRegWithEncoding(
RegClass, ReversedCopy ? (SrcEncoding - NumCopied + 1) : SrcEncoding);
- MCRegister ActualDstReg = FindRegWithEncoding(
+ MCRegister ActualDstReg = TRI->findVRegWithEncoding(
RegClass, ReversedCopy ? (DstEncoding - NumCopied + 1) : DstEncoding);
auto MIB = BuildMI(MBB, MBBI, DL, get(Opc), ActualDstReg);
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 7e58b6f342689..758bf64ff197c 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -389,6 +389,22 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
.setMIFlag(Flag);
}
+static std::tuple<RISCVVType::VLMUL, const TargetRegisterClass &, unsigned>
+getSpillReloadInfo(unsigned Idx, unsigned Total, uint16_t RegEncoding,
+ bool IsSpill) {
+ if (Idx + 8 <= Total && RegEncoding % 8 == 0)
+ return {RISCVVType::LMUL_8, RISCV::VRM8RegClass,
+ IsSpill ? RISCV::VS8R_V : RISCV::VL8RE8_V};
+ if (Idx + 4 <= Total && RegEncoding % 4 == 0)
+ return {RISCVVType::LMUL_4, RISCV::VRM4RegClass,
+ IsSpill ? RISCV::VS4R_V : RISCV::VL4RE8_V};
+ if (Idx + 2 <= Total && RegEncoding % 2 == 0)
+ return {RISCVVType::LMUL_2, RISCV::VRM2RegClass,
+ IsSpill ? RISCV::VS2R_V : RISCV::VL2RE8_V};
+ return {RISCVVType::LMUL_1, RISCV::VRRegClass,
+ IsSpill ? RISCV::VS1R_V : RISCV::VL1RE8_V};
+}
+
// Split a VSPILLx_Mx pseudo into multiple whole register stores separated by
// LMUL*VLENB bytes.
void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
@@ -403,47 +419,11 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode());
unsigned NF = ZvlssegInfo->first;
unsigned LMUL = ZvlssegInfo->second;
- assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations.");
- unsigned Opcode, SubRegIdx;
- switch (LMUL) {
- default:
- llvm_unreachable("LMUL must be 1, 2, or 4.");
- case 1:
- Opcode = RISCV::VS1R_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- break;
- case 2:
- Opcode = RISCV::VS2R_V;
- SubRegIdx = RISCV::sub_vrm2_0;
- break;
- case 4:
- Opcode = RISCV::VS4R_V;
- SubRegIdx = RISCV::sub_vrm4_0;
- break;
- }
- static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
- "Unexpected subreg numbering");
- static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
- "Unexpected subreg numbering");
- static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
- "Unexpected subreg numbering");
-
- Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- // Optimize for constant VLEN.
- if (auto VLEN = STI.getRealVLen()) {
- const int64_t VLENB = *VLEN / 8;
- int64_t Offset = VLENB * LMUL;
- STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
- } else {
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
- uint32_t ShiftAmount = Log2_32(LMUL);
- if (ShiftAmount != 0)
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
- .addReg(VL)
- .addImm(ShiftAmount);
- }
+ unsigned NumRegs = NF * LMUL;
+ assert(NumRegs <= 8 && "Invalid NF/LMUL combinations.");
Register SrcReg = II->getOperand(0).getReg();
+ uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
Register Base = II->getOperand(1).getReg();
bool IsBaseKill = II->getOperand(1).isKill();
Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass);
@@ -451,23 +431,53 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
auto *OldMMO = *(II->memoperands_begin());
LocationSize OldLoc = OldMMO->getSize();
assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF));
- TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NF);
- auto *NewMMO = MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), NewSize);
- for (unsigned I = 0; I < NF; ++I) {
- // Adding implicit-use of super register to describe we are using part of
- // super register, that prevents machine verifier complaining when part of
- // subreg is undef, see comment in MachineVerifier::checkLiveness for more
- // detail.
- BuildMI(MBB, II, DL, TII->get(Opcode))
- .addReg(TRI->getSubReg(SrcReg, SubRegIdx + I))
- .addReg(Base, getKillRegState(I == NF - 1))
- .addMemOperand(NewMMO)
- .addReg(SrcReg, RegState::Implicit);
- if (I != NF - 1)
+ TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NumRegs);
+
+ Register VLENB = 0;
+ unsigned PreSavedNum = 0;
+ unsigned I = 0;
+ while (I != NumRegs) {
+ auto [LMulSaved, RegClass, Opcode] =
+ getSpillReloadInfo(I, NumRegs, SrcEncoding, true);
+ auto [NumSaved, _] = RISCVVType::decodeVLMUL(LMulSaved);
+ if (PreSavedNum) {
+ Register Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ if (auto VLEN = STI.getRealVLen()) {
+ const int64_t VLENB = *VLEN / 8;
+ int64_t Offset = VLENB * PreSavedNum;
+ STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
+ } else {
+ if (!VLENB) {
+ VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB);
+ }
+ uint32_t ShiftAmount = Log2_32(PreSavedNum);
+ if (ShiftAmount == 0)
+ Step = VLENB;
+ else
+ BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step)
+ .addReg(VLENB)
+ .addImm(ShiftAmount);
+ }
+
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
.addReg(Base, getKillRegState(I != 0 || IsBaseKill))
- .addReg(VL, getKillRegState(I == NF - 2));
- Base = NewBase;
+ .addReg(Step, getKillRegState(true));
+ Base = NewBase;
+ }
+
+ MCRegister ActualSrcReg = findVRegWithEncoding(RegClass, SrcEncoding);
+
+ BuildMI(MBB, II, DL, TII->get(Opcode))
+ .addReg(ActualSrcReg)
+ .addReg(Base, getKillRegState(I + NumSaved == NumRegs))
+ .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(),
+ NewSize * NumSaved))
+ .addReg(SrcReg, RegState::Implicit);
+
+ PreSavedNum = NumSaved;
+ SrcEncoding += NumSaved;
+ I += NumSaved;
}
II->eraseFromParent();
}
@@ -486,65 +496,63 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const {
auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode());
unsigned NF = ZvlssegInfo->first;
unsigned LMUL = ZvlssegInfo->second;
- assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations.");
- unsigned Opcode, SubRegIdx;
- switch (LMUL) {
- default:
- llvm_unreachable("LMUL must be 1, 2, or 4.");
- case 1:
- Opcode = RISCV::VL1RE8_V;
- SubRegIdx = RISCV::sub_vrm1_0;
- break;
- case 2:
- Opcode = RISCV::VL2RE8_V;
- SubRegIdx = RISCV::sub_vrm2_0;
- break;
- case 4:
- Opcode = RISCV::VL4RE8_V;
- SubRegIdx = RISCV::sub_vrm4_0;
- break;
- }
- static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
- "Unexpected subreg numbering");
- static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
- "Unexpected subreg numbering");
- static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
- "Unexpected subreg numbering");
-
- Register VL = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- // Optimize for constant VLEN.
- if (auto VLEN = STI.getRealVLen()) {
- const int64_t VLENB = *VLEN / 8;
- int64_t Offset = VLENB * LMUL;
- STI.getInstrInfo()->movImm(MBB, II, DL, VL, Offset);
- } else {
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VL);
- uint32_t ShiftAmount = Log2_32(LMUL);
- if (ShiftAmount != 0)
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), VL)
- .addReg(VL)
- .addImm(ShiftAmount);
- }
+ unsigned NumRegs = NF * LMUL;
+ assert(NumRegs <= 8 && "Invalid NF/LMUL combinations.");
Register DestReg = II->getOperand(0).getReg();
+ uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
Register Base = II->getOperand(1).getReg();
bool IsBaseKill = II->getOperand(1).isKill();
Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+
auto *OldMMO = *(II->memoperands_begin());
LocationSize OldLoc = OldMMO->getSize();
assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF));
- TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NF);
- auto *NewMMO = MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(), NewSize);
- for (unsigned I = 0; I < NF; ++I) {
- BuildMI(MBB, II, DL, TII->get(Opcode),
- TRI->getSubReg(DestReg, SubRegIdx + I))
- .addReg(Base, getKillRegState(I == NF - 1))
- .addMemOperand(NewMMO);
- if (I != NF - 1)
+ TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NumRegs);
+
+ Register VLENB = 0;
+ unsigned PreReloadedNum = 0;
+ unsigned I = 0;
+ while (I != NumRegs) {
+ auto [LMulReloaded, RegClass, Opcode] =
+ getSpillReloadInfo(I, NumRegs, DestEncoding, false);
+ auto [NumReloaded, _] = RISCVVType::decodeVLMUL(LMulReloaded);
+ if (PreReloadedNum) {
+ Register Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ if (auto VLEN = STI.getRealVLen()) {
+ const int64_t VLENB = *VLEN / 8;
+ int64_t Offset = VLENB * PreReloadedNum;
+ STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
+ } else {
+ if (!VLENB) {
+ VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB);
+ }
+ uint32_t ShiftAmount = Log2_32(PreReloadedNum);
+ if (ShiftAmount == 0)
+ Step = VLENB;
+ else
+ BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step)
+ .addReg(VLENB)
+ .addImm(ShiftAmount);
+ }
+
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
.addReg(Base, getKillRegState(I != 0 || IsBaseKill))
- .addReg(VL, getKillRegState(I == NF - 2));
- Base = NewBase;
+ .addReg(Step, getKillRegState(true));
+ Base = NewBase;
+ }
+
+ MCRegister ActualDestReg = findVRegWithEncoding(RegClass, DestEncoding);
+
+ BuildMI(MBB, II, DL, TII->get(Opcode), ActualDestReg)
+ .addReg(Base, getKillRegState(I + NumReloaded == NumRegs))
+ .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(),
+ NewSize * NumReloaded));
+
+ PreReloadedNum = NumReloaded;
+ DestEncoding += NumReloaded;
+ I += NumReloaded;
}
II->eraseFromParent();
}
@@ -635,9 +643,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
// Handle spill/fill of synthetic register classes for segment operations to
- // ensure correctness in the edge case one gets spilled. There are many
- // possible optimizations here, but given the extreme rarity of such spills,
- // we prefer simplicity of implementation for now.
+ // ensure correctness in the edge case one gets spilled.
switch (MI.getOpcode()) {
case RISCV::PseudoVSPILL2_M1:
case RISCV::PseudoVSPILL2_M2:
@@ -1052,3 +1058,12 @@ bool RISCVRegisterInfo::getRegAllocationHints(
return BaseImplRetVal;
}
+
+Register
+RISCVRegisterInfo::findVRegWithEncoding(const TargetRegisterClass &RegClass,
+ uint16_t Encoding) const {
+ MCRegister Reg = RISCV::V0 + Encoding;
+ if (RISCVRI::getLMul(RegClass.TSFlags) == RISCVVType::LMUL_1)
+ return Reg;
+ return getMatchingSuperReg(Reg, RISCV::sub_vrm1_0, &RegClass);
+}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index b368399e2ad14..ffb4f84afb9a3 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -144,6 +144,9 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
const MachineFunction &MF, const VirtRegMap *VRM,
const LiveRegMatrix *Matrix) const override;
+ Register findVRegWithEncoding(const TargetRegisterClass &RegClass,
+ uint16_t Encoding) const;
+
static bool isVRRegClass(const TargetRegisterClass *RC) {
return RISCVRI::isVRegClass(RC->TSFlags) &&
RISCVRI::getNF(RC->TSFlags) == 1;
diff --git a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
index 0afdcdccd9246..6a7c73672bf6c 100644
--- a/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
+++ b/llvm/test/CodeGen/RISCV/early-clobber-tied-def-subreg-liveness.ll
@@ -40,15 +40,7 @@ define void @_Z3foov() {
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_45)
; CHECK-NEXT: vle16.v v12, (a0)
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vs2r.v v14, (a0) # vscale x 16-byte Folded Spill
+; CHECK-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; CHECK-NEXT: lui a0, %hi(.L__const._Z3foov.var_40)
; CHECK-NEXT: addi a0, a0, %lo(.L__const._Z3foov.var_40)
; CHECK-NEXT: #APP
@@ -59,15 +51,7 @@ define void @_Z3foov() {
; CHECK-NEXT: addi a0, a0, 928
; CHECK-NEXT: vmsbc.vx v0, v8, a0
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 1
-; CHECK-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl2r.v v12, (a0) # vscale x 16-byte Folded Reload
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl2r.v v14, (a0) # vscale x 16-byte Folded Reload
+; CHECK-NEXT: vl8r.v v8, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
index 878b180e34c01..f3c88923c15e2 100644
--- a/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
+++ b/llvm/test/CodeGen/RISCV/regalloc-last-chance-recoloring-failure.ll
@@ -32,11 +32,7 @@ define void @last_chance_recoloring_failure() {
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 2
-; CHECK-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vs4r.v v20, (a0) # vscale x 32-byte Folded Spill
+; CHECK-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; CHECK-NEXT: li s0, 36
; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma
; CHECK-NEXT: vfwadd.vv v16, v8, v12, v0.t
@@ -47,11 +43,7 @@ define void @last_chance_recoloring_failure() {
; CHECK-NEXT: slli a0, a0, 3
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 2
-; CHECK-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
-; CHECK-NEXT: add a0, a0, a1
-; CHECK-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
+; CHECK-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; CHECK-NEXT: vsetvli zero, s0, e16, m4, ta, ma
@@ -92,11 +84,7 @@ define void @last_chance_recoloring_failure() {
; SUBREGLIVENESS-NEXT: slli a0, a0, 3
; SUBREGLIVENESS-NEXT: add a0, sp, a0
; SUBREGLIVENESS-NEXT: addi a0, a0, 16
-; SUBREGLIVENESS-NEXT: csrr a1, vlenb
-; SUBREGLIVENESS-NEXT: slli a1, a1, 2
-; SUBREGLIVENESS-NEXT: vs4r.v v16, (a0) # vscale x 32-byte Folded Spill
-; SUBREGLIVENESS-NEXT: add a0, a0, a1
-; SUBREGLIVENESS-NEXT: vs4r.v v20, (a0) # vscale x 32-byte Folded Spill
+; SUBREGLIVENESS-NEXT: vs8r.v v16, (a0) # vscale x 64-byte Folded Spill
; SUBREGLIVENESS-NEXT: li s0, 36
; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma
; SUBREGLIVENESS-NEXT: vfwadd.vv v16, v8, v12, v0.t
@@ -107,11 +95,7 @@ define void @last_chance_recoloring_failure() {
; SUBREGLIVENESS-NEXT: slli a0, a0, 3
; SUBREGLIVENESS-NEXT: add a0, sp, a0
; SUBREGLIVENESS-NEXT: addi a0, a0, 16
-; SUBREGLIVENESS-NEXT: csrr a1, vlenb
-; SUBREGLIVENESS-NEXT: slli a1, a1, 2
-; SUBREGLIVENESS-NEXT: vl4r.v v16, (a0) # vscale x 32-byte Folded Reload
-; SUBREGLIVENESS-NEXT: add a0, a0, a1
-; SUBREGLIVENESS-NEXT: vl4r.v v20, (a0) # vscale x 32-byte Folded Reload
+; SUBREGLIVENESS-NEXT: vl8r.v v16, (a0) # vscale x 64-byte Folded Reload
; SUBREGLIVENESS-NEXT: addi a0, sp, 16
; SUBREGLIVENESS-NEXT: vl8r.v v24, (a0) # vscale x 64-byte Folded Reload
; SUBREGLIVENESS-NEXT: vsetvli zero, s0, e16, m4, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
index 663bb1fc15517..d69a166b04080 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
@@ -41,14 +41,11 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-NEXT: addi a0, sp, 16
-; SPILL-O2-NEXT: csrr a1, vlenb
-; SPILL-O2-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-NEXT: #APP
; SPILL-O2-NEXT: #NO_APP
-; SPILL-O2-NEXT: addi a0, sp, 16
; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a1, vlenb
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; SPILL-O2-NEXT: csrr a0, vlenb
@@ -64,15 +61,11 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 16
-; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-VLEN128-NEXT: #APP
; SPILL-O2-VLEN128-NEXT: #NO_APP
-; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 16
; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT: li a1, 16
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
@@ -108,14 +101,11 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
-; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
-; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-VSETVLI-NEXT: #APP
; SPILL-O2-VSETVLI-NEXT: #NO_APP
-; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
@@ -161,14 +151,11 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-NEXT: addi a0, sp, 16
-; SPILL-O2-NEXT: csrr a1, vlenb
-; SPILL-O2-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-NEXT: #APP
; SPILL-O2-NEXT: #NO_APP
-; SPILL-O2-NEXT: addi a0, sp, 16
; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a1, vlenb
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; SPILL-O2-NEXT: csrr a0, vlenb
@@ -184,15 +171,11 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 16
-; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-VLEN128-NEXT: #APP
; SPILL-O2-VLEN128-NEXT: #NO_APP
-; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 16
; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT: li a1, 16
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
@@ -228,14 +211,11 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
-; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
-; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-VSETVLI-NEXT: #APP
; SPILL-O2-VSETVLI-NEXT: #NO_APP
-; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
@@ -283,17 +263,12 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-NEXT: addi a0, sp, 16
-; SPILL-O2-NEXT: csrr a1, vlenb
-; SPILL-O2-NEXT: slli a1, a1, 1
-; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
; SPILL-O2-NEXT: #APP
; SPILL-O2-NEXT: #NO_APP
-; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-NEXT: csrr a1, vlenb
; SPILL-O2-NEXT: slli a1, a1, 1
-; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-NEXT: csrr a0, vlenb
@@ -309,15 +284,11 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 32
-; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
; SPILL-O2-VLEN128-NEXT: #APP
; SPILL-O2-VLEN128-NEXT: #NO_APP
-; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 32
; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT: li a1, 32
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64
@@ -353,17 +324,12 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
-; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
-; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
-; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
; SPILL-O2-VSETVLI-NEXT: #APP
; SPILL-O2-VSETVLI-NEXT: #NO_APP
-; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
-; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
@@ -411,17 +377,12 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-NEXT: addi a0, sp, 16
-; SPILL-O2-NEXT: csrr a1, vlenb
-; SPILL-O2-NEXT: slli a1, a1, 2
-; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
-; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; SPILL-O2-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; SPILL-O2-NEXT: #APP
; SPILL-O2-NEXT: #NO_APP
-; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-NEXT: csrr a1, vlenb
; SPILL-O2-NEXT: slli a1, a1, 2
-; SPILL-O2-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-NEXT: csrr a0, vlenb
@@ -437,15 +398,11 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 64
-; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
-; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; SPILL-O2-VLEN128-NEXT: #APP
; SPILL-O2-VLEN128-NEXT: #NO_APP
-; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 64
; SPILL-O2-VLEN128-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT: li a1, 64
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128
@@ -481,17 +438,12 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
-; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
-; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
-; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
-; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; SPILL-O2-VSETVLI-NEXT: #APP
; SPILL-O2-VSETVLI-NEXT: #NO_APP
-; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
-; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
@@ -540,23 +492,19 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O2-NEXT: vlseg3e32.v v8, (a0)
; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
; SPILL-O2-NEXT: csrr a1, vlenb
-; SPILL-O2-NEXT: slli a1, a1, 1
-; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-NEXT: slli a1, a1, 2
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-NEXT: #APP
; SPILL-O2-NEXT: #NO_APP
; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-NEXT: csrr a1, vlenb
; SPILL-O2-NEXT: slli a1, a1, 1
-; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: li a1, 6
; SPILL-O2-NEXT: mul a0, a0, a1
@@ -571,21 +519,17 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O2-VLEN128-NEXT: vlseg3e32.v v8, (a0)
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 32
-; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT: li a1, 64
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
; SPILL-O2-VLEN128-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-VLEN128-NEXT: #APP
; SPILL-O2-VLEN128-NEXT: #NO_APP
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 32
; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT: li a1, 32
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 96
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
@@ -621,23 +565,19 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i32 %vl) nounwind {
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O2-VSETVLI-NEXT: vlseg3e32.v v8, (a0)
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
-; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
-; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
; SPILL-O2-VSETVLI-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-VSETVLI-NEXT: #APP
; SPILL-O2-VSETVLI-NEXT: #NO_APP
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
-; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; SPILL-O2-VSETVLI-NEXT: li a1, 6
; SPILL-O2-VSETVLI-NEXT: mul a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
index dc0e8fd987c6d..610443845389a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
@@ -41,14 +41,11 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-NEXT: addi a0, sp, 16
-; SPILL-O2-NEXT: csrr a1, vlenb
-; SPILL-O2-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-NEXT: #APP
; SPILL-O2-NEXT: #NO_APP
-; SPILL-O2-NEXT: addi a0, sp, 16
; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a1, vlenb
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; SPILL-O2-NEXT: csrr a0, vlenb
@@ -64,15 +61,11 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 16
-; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-VLEN128-NEXT: #APP
; SPILL-O2-VLEN128-NEXT: #NO_APP
-; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 16
; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT: li a1, 16
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
@@ -108,14 +101,11 @@ define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
-; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
-; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-VSETVLI-NEXT: #APP
; SPILL-O2-VSETVLI-NEXT: #NO_APP
-; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
@@ -161,14 +151,11 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-NEXT: addi a0, sp, 16
-; SPILL-O2-NEXT: csrr a1, vlenb
-; SPILL-O2-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-NEXT: #APP
; SPILL-O2-NEXT: #NO_APP
-; SPILL-O2-NEXT: addi a0, sp, 16
; SPILL-O2-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-NEXT: csrr a1, vlenb
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; SPILL-O2-NEXT: csrr a0, vlenb
@@ -184,15 +171,11 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 16
-; SPILL-O2-VLEN128-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-VLEN128-NEXT: #APP
; SPILL-O2-VLEN128-NEXT: #NO_APP
-; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 16
; SPILL-O2-VLEN128-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT: li a1, 16
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
; SPILL-O2-VLEN128-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 32
@@ -228,14 +211,11 @@ define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
-; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
-; SPILL-O2-VSETVLI-NEXT: vs1r.v v8, (a0) # vscale x 8-byte Folded Spill
-; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vs1r.v v9, (a0) # vscale x 8-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-VSETVLI-NEXT: #APP
; SPILL-O2-VSETVLI-NEXT: #NO_APP
-; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
; SPILL-O2-VSETVLI-NEXT: vl1r.v v7, (a0) # vscale x 8-byte Folded Reload
+; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
; SPILL-O2-VSETVLI-NEXT: vl1r.v v8, (a0) # vscale x 8-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m2, ta, ma
@@ -283,17 +263,12 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-NEXT: addi a0, sp, 16
-; SPILL-O2-NEXT: csrr a1, vlenb
-; SPILL-O2-NEXT: slli a1, a1, 1
-; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
; SPILL-O2-NEXT: #APP
; SPILL-O2-NEXT: #NO_APP
-; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-NEXT: csrr a1, vlenb
; SPILL-O2-NEXT: slli a1, a1, 1
-; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-NEXT: csrr a0, vlenb
@@ -309,15 +284,11 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 32
-; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
; SPILL-O2-VLEN128-NEXT: #APP
; SPILL-O2-VLEN128-NEXT: #NO_APP
-; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 32
; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT: li a1, 32
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 64
@@ -353,17 +324,12 @@ define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
-; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
-; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
-; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
; SPILL-O2-VSETVLI-NEXT: #APP
; SPILL-O2-VSETVLI-NEXT: #NO_APP
-; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
-; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m4, ta, ma
@@ -411,17 +377,12 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; SPILL-O2-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-NEXT: addi a0, sp, 16
-; SPILL-O2-NEXT: csrr a1, vlenb
-; SPILL-O2-NEXT: slli a1, a1, 2
-; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
-; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; SPILL-O2-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; SPILL-O2-NEXT: #APP
; SPILL-O2-NEXT: #NO_APP
-; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-NEXT: csrr a1, vlenb
; SPILL-O2-NEXT: slli a1, a1, 2
-; SPILL-O2-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-NEXT: csrr a0, vlenb
@@ -437,15 +398,11 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; SPILL-O2-VLEN128-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 64
-; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
-; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; SPILL-O2-VLEN128-NEXT: #APP
; SPILL-O2-VLEN128-NEXT: #NO_APP
-; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 64
; SPILL-O2-VLEN128-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT: li a1, 64
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 128
@@ -481,17 +438,12 @@ define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m4, ta, ma
; SPILL-O2-VSETVLI-NEXT: vlseg2e32.v v8, (a0)
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
-; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
-; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
-; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
-; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vs4r.v v12, (a0) # vscale x 32-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: vs8r.v v8, (a0) # vscale x 64-byte Folded Spill
; SPILL-O2-VSETVLI-NEXT: #APP
; SPILL-O2-VSETVLI-NEXT: #NO_APP
-; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
-; SPILL-O2-VSETVLI-NEXT: vl4r.v v4, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m8, ta, ma
@@ -540,23 +492,19 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O2-NEXT: vlseg3e32.v v8, (a0)
; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
; SPILL-O2-NEXT: csrr a1, vlenb
-; SPILL-O2-NEXT: slli a1, a1, 1
-; SPILL-O2-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-NEXT: slli a1, a1, 2
; SPILL-O2-NEXT: add a0, a0, a1
; SPILL-O2-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-NEXT: #APP
; SPILL-O2-NEXT: #NO_APP
; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-NEXT: csrr a1, vlenb
; SPILL-O2-NEXT: slli a1, a1, 1
-; SPILL-O2-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; SPILL-O2-NEXT: add a0, a0, a1
-; SPILL-O2-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-NEXT: csrr a0, vlenb
; SPILL-O2-NEXT: li a1, 6
; SPILL-O2-NEXT: mul a0, a0, a1
@@ -571,21 +519,17 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VLEN128-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O2-VLEN128-NEXT: vlseg3e32.v v8, (a0)
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 32
-; SPILL-O2-VLEN128-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
+; SPILL-O2-VLEN128-NEXT: li a1, 64
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
; SPILL-O2-VLEN128-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-VLEN128-NEXT: #APP
; SPILL-O2-VLEN128-NEXT: #NO_APP
; SPILL-O2-VLEN128-NEXT: addi a0, sp, 16
-; SPILL-O2-VLEN128-NEXT: li a1, 32
; SPILL-O2-VLEN128-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT: li a1, 32
; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; SPILL-O2-VLEN128-NEXT: add a0, a0, a1
-; SPILL-O2-VLEN128-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-VLEN128-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 96
; SPILL-O2-VLEN128-NEXT: addi sp, sp, 16
; SPILL-O2-VLEN128-NEXT: ret
@@ -621,23 +565,19 @@ define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(ptr %base, i64 %vl) nounwind {
; SPILL-O2-VSETVLI-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; SPILL-O2-VSETVLI-NEXT: vlseg3e32.v v8, (a0)
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vs4r.v v8, (a0) # vscale x 32-byte Folded Spill
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
-; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
-; SPILL-O2-VSETVLI-NEXT: vs2r.v v8, (a0) # vscale x 16-byte Folded Spill
-; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vs2r.v v10, (a0) # vscale x 16-byte Folded Spill
+; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 2
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
; SPILL-O2-VSETVLI-NEXT: vs2r.v v12, (a0) # vscale x 16-byte Folded Spill
; SPILL-O2-VSETVLI-NEXT: #APP
; SPILL-O2-VSETVLI-NEXT: #NO_APP
; SPILL-O2-VSETVLI-NEXT: addi a0, sp, 16
+; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: csrr a1, vlenb
; SPILL-O2-VSETVLI-NEXT: slli a1, a1, 1
-; SPILL-O2-VSETVLI-NEXT: vl2r.v v6, (a0) # vscale x 16-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vl2r.v v8, (a0) # vscale x 16-byte Folded Reload
-; SPILL-O2-VSETVLI-NEXT: add a0, a0, a1
-; SPILL-O2-VSETVLI-NEXT: vl2r.v v10, (a0) # vscale x 16-byte Folded Reload
+; SPILL-O2-VSETVLI-NEXT: vl4r.v v8, (a0) # vscale x 32-byte Folded Reload
; SPILL-O2-VSETVLI-NEXT: vsetvli a0, zero, e8, m1, ta, ma
; SPILL-O2-VSETVLI-NEXT: li a1, 6
; SPILL-O2-VSETVLI-NEXT: mul a0, a0, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
index 055d9ed630718..6b1e5e08c4b38 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
@@ -30,35 +30,22 @@ body: |
; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: $v0_v1_v2_v3_v4_v5_v6 = PseudoVLSEG7E64_V_M1 undef $v0_v1_v2_v3_v4_v5_v6, renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: $x11 = ADDI $x2, 16
+ ; CHECK-NEXT: VS4R_V $v0m4, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s256>) into %stack.0, align 8)
; CHECK-NEXT: $x12 = PseudoReadVLENB
- ; CHECK-NEXT: VS1R_V $v0, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
- ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
- ; CHECK-NEXT: VS1R_V $v1, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
- ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
- ; CHECK-NEXT: VS1R_V $v2, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
- ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
- ; CHECK-NEXT: VS1R_V $v3, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
- ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
- ; CHECK-NEXT: VS1R_V $v4, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
- ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
- ; CHECK-NEXT: VS1R_V $v5, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
+ ; CHECK-NEXT: $x13 = SLLI $x12, 2
+ ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13
+ ; CHECK-NEXT: VS2R_V $v4m2, $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s128>) into %stack.0, align 8)
+ ; CHECK-NEXT: $x12 = SLLI killed $x12, 1
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
; CHECK-NEXT: VS1R_V $v6, killed $x11, implicit $v0_v1_v2_v3_v4_v5_v6 :: (store (<vscale x 1 x s64>) into %stack.0)
; CHECK-NEXT: $x11 = ADDI $x2, 16
- ; CHECK-NEXT: $x12 = PseudoReadVLENB
; CHECK-NEXT: $v7 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
- ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
- ; CHECK-NEXT: $v8 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
- ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
- ; CHECK-NEXT: $v9 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
- ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
- ; CHECK-NEXT: $v10 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
- ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
- ; CHECK-NEXT: $v11 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
- ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
- ; CHECK-NEXT: $v12 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
+ ; CHECK-NEXT: $x12 = PseudoReadVLENB
+ ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
+ ; CHECK-NEXT: $v8m4 = VL4RE8_V $x11 :: (load (<vscale x 1 x s256>) from %stack.0, align 8)
+ ; CHECK-NEXT: $x12 = SLLI killed $x12, 2
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
- ; CHECK-NEXT: $v13 = VL1RE8_V killed $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
+ ; CHECK-NEXT: $v12m2 = VL2RE8_V killed $x11 :: (load (<vscale x 1 x s128>) from %stack.0, align 8)
; CHECK-NEXT: VS1R_V killed $v8, killed renamable $x10
; CHECK-NEXT: $x10 = frame-destroy PseudoReadVLENB
; CHECK-NEXT: $x10 = frame-destroy SLLI killed $x10, 3
>From e110946ef11409d89031c49b6a3056bab9e608c5 Mon Sep 17 00:00:00 2001
From: Pengcheng Wang <wangpengcheng.pp at bytedance.com>
Date: Wed, 13 Aug 2025 12:10:14 +0800
Subject: [PATCH 2/7] Add tests for 124/241 sequences
---
llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir | 65 +++++++++++++++++--
1 file changed, 59 insertions(+), 6 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
index 6b1e5e08c4b38..bd248bac717e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
@@ -2,15 +2,15 @@
# RUN: llc -mtriple=riscv64 -mattr=+v -stop-after=prologepilog %s -o - 2>&1 | FileCheck %s
--- |
- target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
- target triple = "riscv64"
-
- define void @zvlsseg_spill(ptr %base, i64 %vl) {
+ define void @zvlsseg_spill_0(ptr %base, i64 %vl) {
+ ret void
+ }
+ define void @zvlsseg_spill_1(ptr %base, i64 %vl) {
ret void
}
...
---
-name: zvlsseg_spill
+name: zvlsseg_spill_0
tracksRegLiveness: true
stack:
- { id: 0, offset: 0, size: 64, alignment: 8, stack-id: scalable-vector }
@@ -18,7 +18,7 @@ body: |
bb.0:
liveins: $x10, $x11
- ; CHECK-LABEL: name: zvlsseg_spill
+ ; CHECK-LABEL: name: zvlsseg_spill_0
; CHECK: liveins: $x10, $x11
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -16
@@ -62,3 +62,56 @@ body: |
VS1R_V killed $v8, %0:gpr
PseudoRET
...
+
+---
+name: zvlsseg_spill_1
+tracksRegLiveness: true
+stack:
+ - { id: 0, offset: 0, size: 64, alignment: 8, stack-id: scalable-vector }
+body: |
+ bb.0:
+ liveins: $x10, $x11
+ ; CHECK-LABEL: name: zvlsseg_spill_1
+ ; CHECK: liveins: $x10, $x11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x2 = frame-setup ADDI $x2, -16
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16
+ ; CHECK-NEXT: $x12 = frame-setup PseudoReadVLENB
+ ; CHECK-NEXT: $x12 = frame-setup SLLI killed $x12, 3
+ ; CHECK-NEXT: $x2 = frame-setup SUB $x2, killed $x12
+ ; CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22
+ ; CHECK-NEXT: dead $x0 = PseudoVSETVLI killed renamable $x11, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: $v1_v2_v3_v4_v5_v6_v7 = PseudoVLSEG7E64_V_M1 undef $v1_v2_v3_v4_v5_v6_v7, renamable $x10, $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: $x11 = ADDI $x2, 16
+ ; CHECK-NEXT: VS1R_V $v1, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store (<vscale x 1 x s64>) into %stack.0)
+ ; CHECK-NEXT: $x12 = PseudoReadVLENB
+ ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
+ ; CHECK-NEXT: VS2R_V $v2m2, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store (<vscale x 1 x s128>) into %stack.0, align 8)
+ ; CHECK-NEXT: $x12 = SLLI killed $x12, 1
+ ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
+ ; CHECK-NEXT: VS4R_V $v4m4, killed $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store (<vscale x 1 x s256>) into %stack.0, align 8)
+ ; CHECK-NEXT: $x11 = ADDI $x2, 16
+ ; CHECK-NEXT: $v10m2 = VL2RE8_V $x11 :: (load (<vscale x 1 x s128>) from %stack.0, align 8)
+ ; CHECK-NEXT: $x12 = PseudoReadVLENB
+ ; CHECK-NEXT: $x13 = SLLI $x12, 1
+ ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x13
+ ; CHECK-NEXT: $v12m4 = VL4RE8_V $x11 :: (load (<vscale x 1 x s256>) from %stack.0, align 8)
+ ; CHECK-NEXT: $x12 = SLLI killed $x12, 2
+ ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
+ ; CHECK-NEXT: $v16 = VL1RE8_V killed $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
+ ; CHECK-NEXT: VS1R_V killed $v10, killed renamable $x10
+ ; CHECK-NEXT: $x10 = frame-destroy PseudoReadVLENB
+ ; CHECK-NEXT: $x10 = frame-destroy SLLI killed $x10, 3
+ ; CHECK-NEXT: $x2 = frame-destroy ADD $x2, killed $x10
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $x2, 16
+ ; CHECK-NEXT: $x2 = frame-destroy ADDI $x2, 16
+ ; CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0
+ ; CHECK-NEXT: PseudoRET
+ %0:gpr = COPY $x10
+ %1:gprnox0 = COPY $x11
+ $v1_v2_v3_v4_v5_v6_v7 = PseudoVLSEG7E64_V_M1 undef $v1_v2_v3_v4_v5_v6_v7, %0, %1, 6, 0
+ PseudoVSPILL7_M1 killed renamable $v1_v2_v3_v4_v5_v6_v7, %stack.0 :: (store (<vscale x 7 x s64>) into %stack.0, align 8)
+ renamable $v10_v11_v12_v13_v14_v15_v16 = PseudoVRELOAD7_M1 %stack.0 :: (load (<vscale x 7 x s64>) from %stack.0, align 8)
+ VS1R_V killed $v10, %0:gpr
+ PseudoRET
+...
>From cfa7f3a02dcbdcc2ae9eb0b4dee77de10e49ebc8 Mon Sep 17 00:00:00 2001
From: Pengcheng Wang <wangpengcheng.pp at bytedance.com>
Date: Wed, 13 Aug 2025 12:23:34 +0800
Subject: [PATCH 3/7] First round of addressing comments
1. Add `NumRemaining`.
2. Rename `NewSize` to `VRegSize`.
3. Add argument comments.
4. Don't create `Step` for `ShiftAmount==0` case.
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 35 ++++++++++++---------
1 file changed, 20 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 758bf64ff197c..a4ea10b2a4cff 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -390,15 +390,14 @@ void RISCVRegisterInfo::adjustReg(MachineBasicBlock &MBB,
}
static std::tuple<RISCVVType::VLMUL, const TargetRegisterClass &, unsigned>
-getSpillReloadInfo(unsigned Idx, unsigned Total, uint16_t RegEncoding,
- bool IsSpill) {
- if (Idx + 8 <= Total && RegEncoding % 8 == 0)
+getSpillReloadInfo(unsigned NumRemaining, uint16_t RegEncoding, bool IsSpill) {
+ if (NumRemaining >= 8 && RegEncoding % 8 == 0)
return {RISCVVType::LMUL_8, RISCV::VRM8RegClass,
IsSpill ? RISCV::VS8R_V : RISCV::VL8RE8_V};
- if (Idx + 4 <= Total && RegEncoding % 4 == 0)
+ if (NumRemaining >= 4 && RegEncoding % 4 == 0)
return {RISCVVType::LMUL_4, RISCV::VRM4RegClass,
IsSpill ? RISCV::VS4R_V : RISCV::VL4RE8_V};
- if (Idx + 2 <= Total && RegEncoding % 2 == 0)
+ if (NumRemaining >= 2 && RegEncoding % 2 == 0)
return {RISCVVType::LMUL_2, RISCV::VRM2RegClass,
IsSpill ? RISCV::VS2R_V : RISCV::VL2RE8_V};
return {RISCVVType::LMUL_1, RISCV::VRRegClass,
@@ -431,20 +430,21 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
auto *OldMMO = *(II->memoperands_begin());
LocationSize OldLoc = OldMMO->getSize();
assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF));
- TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NumRegs);
+ TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs);
Register VLENB = 0;
unsigned PreSavedNum = 0;
unsigned I = 0;
while (I != NumRegs) {
auto [LMulSaved, RegClass, Opcode] =
- getSpillReloadInfo(I, NumRegs, SrcEncoding, true);
+ getSpillReloadInfo(NumRegs - I, SrcEncoding, /*IsSpill=*/true);
auto [NumSaved, _] = RISCVVType::decodeVLMUL(LMulSaved);
if (PreSavedNum) {
- Register Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ Register Step;
if (auto VLEN = STI.getRealVLen()) {
const int64_t VLENB = *VLEN / 8;
int64_t Offset = VLENB * PreSavedNum;
+ Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
} else {
if (!VLENB) {
@@ -454,10 +454,12 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
uint32_t ShiftAmount = Log2_32(PreSavedNum);
if (ShiftAmount == 0)
Step = VLENB;
- else
+ else {
+ Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step)
.addReg(VLENB)
.addImm(ShiftAmount);
+ }
}
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
@@ -472,7 +474,7 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
.addReg(ActualSrcReg)
.addReg(Base, getKillRegState(I + NumSaved == NumRegs))
.addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(),
- NewSize * NumSaved))
+ VRegSize * NumSaved))
.addReg(SrcReg, RegState::Implicit);
PreSavedNum = NumSaved;
@@ -508,18 +510,19 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const {
auto *OldMMO = *(II->memoperands_begin());
LocationSize OldLoc = OldMMO->getSize();
assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF));
- TypeSize NewSize = OldLoc.getValue().divideCoefficientBy(NumRegs);
+ TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs);
Register VLENB = 0;
unsigned PreReloadedNum = 0;
unsigned I = 0;
while (I != NumRegs) {
auto [LMulReloaded, RegClass, Opcode] =
- getSpillReloadInfo(I, NumRegs, DestEncoding, false);
+ getSpillReloadInfo(NumRegs - I, DestEncoding, /*IsSpill=*/false);
auto [NumReloaded, _] = RISCVVType::decodeVLMUL(LMulReloaded);
if (PreReloadedNum) {
- Register Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
+ Register Step;
if (auto VLEN = STI.getRealVLen()) {
+ Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
const int64_t VLENB = *VLEN / 8;
int64_t Offset = VLENB * PreReloadedNum;
STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
@@ -531,10 +534,12 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const {
uint32_t ShiftAmount = Log2_32(PreReloadedNum);
if (ShiftAmount == 0)
Step = VLENB;
- else
+ else {
+ Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step)
.addReg(VLENB)
.addImm(ShiftAmount);
+ }
}
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
@@ -548,7 +553,7 @@ void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const {
BuildMI(MBB, II, DL, TII->get(Opcode), ActualDestReg)
.addReg(Base, getKillRegState(I + NumReloaded == NumRegs))
.addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(),
- NewSize * NumReloaded));
+ VRegSize * NumReloaded));
PreReloadedNum = NumReloaded;
DestEncoding += NumReloaded;
>From 8a105b9e86bba55b109229ea33265c07631cd5b8 Mon Sep 17 00:00:00 2001
From: Pengcheng Wang <wangpengcheng.pp at bytedance.com>
Date: Wed, 13 Aug 2025 14:16:17 +0800
Subject: [PATCH 4/7] Fold spill/reload implementations into one function
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 137 +++++---------------
llvm/lib/Target/RISCV/RISCVRegisterInfo.h | 4 +-
2 files changed, 37 insertions(+), 104 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index a4ea10b2a4cff..1075c33b7c2d5 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -404,9 +404,10 @@ getSpillReloadInfo(unsigned NumRemaining, uint16_t RegEncoding, bool IsSpill) {
IsSpill ? RISCV::VS1R_V : RISCV::VL1RE8_V};
}
-// Split a VSPILLx_Mx pseudo into multiple whole register stores separated by
-// LMUL*VLENB bytes.
-void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
+// Split a VSPILLx_Mx/VSPILLx_Mx pseudo into multiple whole register stores
+// separated by LMUL*VLENB bytes.
+void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
+ bool IsSpill) const {
DebugLoc DL = II->getDebugLoc();
MachineBasicBlock &MBB = *II->getParent();
MachineFunction &MF = *MBB.getParent();
@@ -421,8 +422,8 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
unsigned NumRegs = NF * LMUL;
assert(NumRegs <= 8 && "Invalid NF/LMUL combinations.");
- Register SrcReg = II->getOperand(0).getReg();
- uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
+ Register Reg = II->getOperand(0).getReg();
+ uint16_t RegEncoding = TRI->getEncodingValue(Reg);
Register Base = II->getOperand(1).getReg();
bool IsBaseKill = II->getOperand(1).isKill();
Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass);
@@ -433,17 +434,18 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs);
Register VLENB = 0;
- unsigned PreSavedNum = 0;
+ unsigned PreHandledNum = 0;
unsigned I = 0;
while (I != NumRegs) {
- auto [LMulSaved, RegClass, Opcode] =
- getSpillReloadInfo(NumRegs - I, SrcEncoding, /*IsSpill=*/true);
- auto [NumSaved, _] = RISCVVType::decodeVLMUL(LMulSaved);
- if (PreSavedNum) {
+ auto [LMulHandled, RegClass, Opcode] =
+ getSpillReloadInfo(NumRegs - I, RegEncoding, IsSpill);
+ auto [RegNumHandled, _] = RISCVVType::decodeVLMUL(LMulHandled);
+ if (PreHandledNum) {
Register Step;
+ // Optimize for constant VLEN.
if (auto VLEN = STI.getRealVLen()) {
const int64_t VLENB = *VLEN / 8;
- int64_t Offset = VLENB * PreSavedNum;
+ int64_t Offset = VLENB * PreHandledNum;
Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
} else {
@@ -451,7 +453,7 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB);
}
- uint32_t ShiftAmount = Log2_32(PreSavedNum);
+ uint32_t ShiftAmount = Log2_32(PreHandledNum);
if (ShiftAmount == 0)
Step = VLENB;
else {
@@ -468,96 +470,27 @@ void RISCVRegisterInfo::lowerVSPILL(MachineBasicBlock::iterator II) const {
Base = NewBase;
}
- MCRegister ActualSrcReg = findVRegWithEncoding(RegClass, SrcEncoding);
-
- BuildMI(MBB, II, DL, TII->get(Opcode))
- .addReg(ActualSrcReg)
- .addReg(Base, getKillRegState(I + NumSaved == NumRegs))
- .addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(),
- VRegSize * NumSaved))
- .addReg(SrcReg, RegState::Implicit);
-
- PreSavedNum = NumSaved;
- SrcEncoding += NumSaved;
- I += NumSaved;
- }
- II->eraseFromParent();
-}
-
-// Split a VSPILLx_Mx pseudo into multiple whole register loads separated by
-// LMUL*VLENB bytes.
-void RISCVRegisterInfo::lowerVRELOAD(MachineBasicBlock::iterator II) const {
- DebugLoc DL = II->getDebugLoc();
- MachineBasicBlock &MBB = *II->getParent();
- MachineFunction &MF = *MBB.getParent();
- MachineRegisterInfo &MRI = MF.getRegInfo();
- const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
- const TargetInstrInfo *TII = STI.getInstrInfo();
- const TargetRegisterInfo *TRI = STI.getRegisterInfo();
-
- auto ZvlssegInfo = RISCV::isRVVSpillForZvlsseg(II->getOpcode());
- unsigned NF = ZvlssegInfo->first;
- unsigned LMUL = ZvlssegInfo->second;
- unsigned NumRegs = NF * LMUL;
- assert(NumRegs <= 8 && "Invalid NF/LMUL combinations.");
-
- Register DestReg = II->getOperand(0).getReg();
- uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
- Register Base = II->getOperand(1).getReg();
- bool IsBaseKill = II->getOperand(1).isKill();
- Register NewBase = MRI.createVirtualRegister(&RISCV::GPRRegClass);
-
- auto *OldMMO = *(II->memoperands_begin());
- LocationSize OldLoc = OldMMO->getSize();
- assert(OldLoc.isPrecise() && OldLoc.getValue().isKnownMultipleOf(NF));
- TypeSize VRegSize = OldLoc.getValue().divideCoefficientBy(NumRegs);
-
- Register VLENB = 0;
- unsigned PreReloadedNum = 0;
- unsigned I = 0;
- while (I != NumRegs) {
- auto [LMulReloaded, RegClass, Opcode] =
- getSpillReloadInfo(NumRegs - I, DestEncoding, /*IsSpill=*/false);
- auto [NumReloaded, _] = RISCVVType::decodeVLMUL(LMulReloaded);
- if (PreReloadedNum) {
- Register Step;
- if (auto VLEN = STI.getRealVLen()) {
- Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- const int64_t VLENB = *VLEN / 8;
- int64_t Offset = VLENB * PreReloadedNum;
- STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
- } else {
- if (!VLENB) {
- VLENB = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, II, DL, TII->get(RISCV::PseudoReadVLENB), VLENB);
- }
- uint32_t ShiftAmount = Log2_32(PreReloadedNum);
- if (ShiftAmount == 0)
- Step = VLENB;
- else {
- Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
- BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step)
- .addReg(VLENB)
- .addImm(ShiftAmount);
- }
- }
-
- BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
- .addReg(Base, getKillRegState(I != 0 || IsBaseKill))
- .addReg(Step, getKillRegState(true));
- Base = NewBase;
- }
-
- MCRegister ActualDestReg = findVRegWithEncoding(RegClass, DestEncoding);
+ MCRegister ActualReg = findVRegWithEncoding(RegClass, RegEncoding);
+ MachineInstrBuilder MI;
+ if (IsSpill)
+ MI = BuildMI(MBB, II, DL, TII->get(Opcode)).addReg(ActualReg);
+ else
+ MI = BuildMI(MBB, II, DL, TII->get(Opcode), ActualReg);
- BuildMI(MBB, II, DL, TII->get(Opcode), ActualDestReg)
- .addReg(Base, getKillRegState(I + NumReloaded == NumRegs))
+ MI.addReg(Base, getKillRegState(I + RegNumHandled == NumRegs))
.addMemOperand(MF.getMachineMemOperand(OldMMO, OldMMO->getOffset(),
- VRegSize * NumReloaded));
-
- PreReloadedNum = NumReloaded;
- DestEncoding += NumReloaded;
- I += NumReloaded;
+ VRegSize * RegNumHandled));
+
+ // Adding implicit-use of super register to describe we are using part of
+ // super register, that prevents machine verifier complaining when part of
+ // subreg is undef, see comment in MachineVerifier::checkLiveness for more
+ // detail.
+ if (IsSpill)
+ MI.addReg(Reg, RegState::Implicit);
+
+ PreHandledNum = RegNumHandled;
+ RegEncoding += RegNumHandled;
+ I += RegNumHandled;
}
II->eraseFromParent();
}
@@ -661,7 +594,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case RISCV::PseudoVSPILL6_M1:
case RISCV::PseudoVSPILL7_M1:
case RISCV::PseudoVSPILL8_M1:
- lowerVSPILL(II);
+ lowerSegmentSpillReload(II, /*IsSpill=*/true);
return true;
case RISCV::PseudoVRELOAD2_M1:
case RISCV::PseudoVRELOAD2_M2:
@@ -674,7 +607,7 @@ bool RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case RISCV::PseudoVRELOAD6_M1:
case RISCV::PseudoVRELOAD7_M1:
case RISCV::PseudoVRELOAD8_M1:
- lowerVRELOAD(II);
+ lowerSegmentSpillReload(II, /*IsSpill=*/false);
return true;
}
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
index ffb4f84afb9a3..2810139bf52ea 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.h
@@ -107,8 +107,8 @@ struct RISCVRegisterInfo : public RISCVGenRegisterInfo {
int64_t getFrameIndexInstrOffset(const MachineInstr *MI,
int Idx) const override;
- void lowerVSPILL(MachineBasicBlock::iterator II) const;
- void lowerVRELOAD(MachineBasicBlock::iterator II) const;
+ void lowerSegmentSpillReload(MachineBasicBlock::iterator II,
+ bool IsSpill) const;
Register getFrameRegister(const MachineFunction &MF) const override;
>From eec2cfe0ed20f255c3e0436ac7990941e1f29080 Mon Sep 17 00:00:00 2001
From: Pengcheng Wang <wangpengcheng.pp at bytedance.com>
Date: Wed, 13 Aug 2025 14:24:34 +0800
Subject: [PATCH 5/7] Inline const VLENB
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 1075c33b7c2d5..d5cea64b6842d 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -444,8 +444,7 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
Register Step;
// Optimize for constant VLEN.
if (auto VLEN = STI.getRealVLen()) {
- const int64_t VLENB = *VLEN / 8;
- int64_t Offset = VLENB * PreHandledNum;
+ int64_t Offset = *VLEN / 8 * PreHandledNum;
Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
STI.getInstrInfo()->movImm(MBB, II, DL, Step, Offset);
} else {
>From bd690f8389af65f39ccab0a0590dca2c56b31076 Mon Sep 17 00:00:00 2001
From: Pengcheng Wang <wangpengcheng.pp at bytedance.com>
Date: Wed, 13 Aug 2025 14:25:06 +0800
Subject: [PATCH 6/7] Set IsKill for last step reg
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 2 +-
llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index d5cea64b6842d..1f2acb97f26dd 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -465,7 +465,7 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
.addReg(Base, getKillRegState(I != 0 || IsBaseKill))
- .addReg(Step, getKillRegState(true));
+ .addReg(Step, getKillRegState(I + RegNumHandled == NumRegs));
Base = NewBase;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
index bd248bac717e8..dd9960d17af43 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
@@ -41,7 +41,7 @@ body: |
; CHECK-NEXT: $x11 = ADDI $x2, 16
; CHECK-NEXT: $v7 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
; CHECK-NEXT: $x12 = PseudoReadVLENB
- ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
+ ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
; CHECK-NEXT: $v8m4 = VL4RE8_V $x11 :: (load (<vscale x 1 x s256>) from %stack.0, align 8)
; CHECK-NEXT: $x12 = SLLI killed $x12, 2
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
@@ -85,7 +85,7 @@ body: |
; CHECK-NEXT: $x11 = ADDI $x2, 16
; CHECK-NEXT: VS1R_V $v1, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store (<vscale x 1 x s64>) into %stack.0)
; CHECK-NEXT: $x12 = PseudoReadVLENB
- ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
+ ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
; CHECK-NEXT: VS2R_V $v2m2, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store (<vscale x 1 x s128>) into %stack.0, align 8)
; CHECK-NEXT: $x12 = SLLI killed $x12, 1
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
>From 6c77b9a8917eebfbe1e13aeaf98bed79dabdce7e Mon Sep 17 00:00:00 2001
From: Pengcheng Wang <wangpengcheng.pp at bytedance.com>
Date: Thu, 14 Aug 2025 19:46:57 +0800
Subject: [PATCH 7/7] Update IsKill state
---
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 5 +++--
llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir | 4 ++--
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 1f2acb97f26dd..36150cb40740f 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -458,14 +458,15 @@ void RISCVRegisterInfo::lowerSegmentSpillReload(MachineBasicBlock::iterator II,
else {
Step = MRI.createVirtualRegister(&RISCV::GPRRegClass);
BuildMI(MBB, II, DL, TII->get(RISCV::SLLI), Step)
- .addReg(VLENB)
+ .addReg(VLENB, getKillRegState(I + RegNumHandled == NumRegs))
.addImm(ShiftAmount);
}
}
BuildMI(MBB, II, DL, TII->get(RISCV::ADD), NewBase)
.addReg(Base, getKillRegState(I != 0 || IsBaseKill))
- .addReg(Step, getKillRegState(I + RegNumHandled == NumRegs));
+ .addReg(Step, getKillRegState(Step != VLENB ||
+ I + RegNumHandled == NumRegs));
Base = NewBase;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
index dd9960d17af43..bd248bac717e8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/zvlsseg-spill.mir
@@ -41,7 +41,7 @@ body: |
; CHECK-NEXT: $x11 = ADDI $x2, 16
; CHECK-NEXT: $v7 = VL1RE8_V $x11 :: (load (<vscale x 1 x s64>) from %stack.0)
; CHECK-NEXT: $x12 = PseudoReadVLENB
- ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
+ ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
; CHECK-NEXT: $v8m4 = VL4RE8_V $x11 :: (load (<vscale x 1 x s256>) from %stack.0, align 8)
; CHECK-NEXT: $x12 = SLLI killed $x12, 2
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
@@ -85,7 +85,7 @@ body: |
; CHECK-NEXT: $x11 = ADDI $x2, 16
; CHECK-NEXT: VS1R_V $v1, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store (<vscale x 1 x s64>) into %stack.0)
; CHECK-NEXT: $x12 = PseudoReadVLENB
- ; CHECK-NEXT: $x11 = ADD killed $x11, $x12
+ ; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
; CHECK-NEXT: VS2R_V $v2m2, $x11, implicit $v1_v2_v3_v4_v5_v6_v7 :: (store (<vscale x 1 x s128>) into %stack.0, align 8)
; CHECK-NEXT: $x12 = SLLI killed $x12, 1
; CHECK-NEXT: $x11 = ADD killed $x11, killed $x12
More information about the llvm-commits
mailing list