[llvm] aa8d33a - [RISCV] Spilling for Zvlsseg registers.
Hsiangkai Wang via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 18 17:07:29 PDT 2021
Author: Hsiangkai Wang
Date: 2021-03-19T07:46:16+08:00
New Revision: aa8d33a6d6346e1ed444a59d0655f4a43ba96875
URL: https://github.com/llvm/llvm-project/commit/aa8d33a6d6346e1ed444a59d0655f4a43ba96875
DIFF: https://github.com/llvm/llvm-project/commit/aa8d33a6d6346e1ed444a59d0655f4a43ba96875.diff
LOG: [RISCV] Spilling for Zvlsseg registers.
For Zvlsseg, we create several tuple register classes. When spilling for
these tuple register classes, we need to iterate NF times to load/store
these tuple registers.
Differential Revision: https://reviews.llvm.org/D98629
Added:
llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
Modified:
llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/lib/Target/RISCV/RISCVInstrInfo.h
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index ec9a39569952..581f26c64abc 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -62,6 +62,8 @@ class RISCVExpandPseudo : public MachineFunctionPass {
bool expandVSetVL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
bool expandVMSET_VMCLR(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, unsigned Opcode);
+ bool expandVSPILL(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
+ bool expandVRELOAD(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
};
char RISCVExpandPseudo::ID = 0;
@@ -123,6 +125,30 @@ bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
case RISCV::PseudoVMSET_M_B64:
// vmset.m vd => vmxnor.mm vd, vd, vd
return expandVMSET_VMCLR(MBB, MBBI, RISCV::VMXNOR_MM);
+ case RISCV::PseudoVSPILL2_M1:
+ case RISCV::PseudoVSPILL2_M2:
+ case RISCV::PseudoVSPILL2_M4:
+ case RISCV::PseudoVSPILL3_M1:
+ case RISCV::PseudoVSPILL3_M2:
+ case RISCV::PseudoVSPILL4_M1:
+ case RISCV::PseudoVSPILL4_M2:
+ case RISCV::PseudoVSPILL5_M1:
+ case RISCV::PseudoVSPILL6_M1:
+ case RISCV::PseudoVSPILL7_M1:
+ case RISCV::PseudoVSPILL8_M1:
+ return expandVSPILL(MBB, MBBI);
+ case RISCV::PseudoVRELOAD2_M1:
+ case RISCV::PseudoVRELOAD2_M2:
+ case RISCV::PseudoVRELOAD2_M4:
+ case RISCV::PseudoVRELOAD3_M1:
+ case RISCV::PseudoVRELOAD3_M2:
+ case RISCV::PseudoVRELOAD4_M1:
+ case RISCV::PseudoVRELOAD4_M2:
+ case RISCV::PseudoVRELOAD5_M1:
+ case RISCV::PseudoVRELOAD6_M1:
+ case RISCV::PseudoVRELOAD7_M1:
+ case RISCV::PseudoVRELOAD8_M1:
+ return expandVRELOAD(MBB, MBBI);
}
return false;
@@ -253,6 +279,96 @@ bool RISCVExpandPseudo::expandVMSET_VMCLR(MachineBasicBlock &MBB,
return true;
}
+bool RISCVExpandPseudo::expandVSPILL(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
+ DebugLoc DL = MBBI->getDebugLoc();
+ Register SrcReg = MBBI->getOperand(0).getReg();
+ Register Base = MBBI->getOperand(1).getReg();
+ Register VL = MBBI->getOperand(2).getReg();
+ auto ZvlssegInfo = TII->isRVVSpillForZvlsseg(MBBI->getOpcode());
+ if (!ZvlssegInfo)
+ return false;
+ unsigned NF = ZvlssegInfo->first;
+ unsigned LMUL = ZvlssegInfo->second;
+ assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations.");
+ unsigned Opcode = RISCV::VS1R_V;
+ unsigned SubRegIdx = RISCV::sub_vrm1_0;
+ static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
+ "Unexpected subreg numbering");
+ if (LMUL == 2) {
+ Opcode = RISCV::VS2R_V;
+ SubRegIdx = RISCV::sub_vrm2_0;
+ static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
+ "Unexpected subreg numbering");
+ } else if (LMUL == 4) {
+ Opcode = RISCV::VS4R_V;
+ SubRegIdx = RISCV::sub_vrm4_0;
+ static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
+ "Unexpected subreg numbering");
+ } else
+ assert(LMUL == 1 && "LMUL must be 1, 2, or 4.");
+
+ for (unsigned I = 0; I < NF; ++I) {
+ BuildMI(MBB, MBBI, DL, TII->get(Opcode))
+ .addReg(TRI->getSubReg(SrcReg, SubRegIdx + I))
+ .addReg(Base)
+ .addMemOperand(*(MBBI->memoperands_begin()));
+ if (I != NF - 1)
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADD), Base)
+ .addReg(Base)
+ .addReg(VL);
+ }
+ MBBI->eraseFromParent();
+ return true;
+}
+
+bool RISCVExpandPseudo::expandVRELOAD(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI) {
+ const TargetRegisterInfo *TRI =
+ MBB.getParent()->getSubtarget().getRegisterInfo();
+ DebugLoc DL = MBBI->getDebugLoc();
+ Register DestReg = MBBI->getOperand(0).getReg();
+ Register Base = MBBI->getOperand(1).getReg();
+ Register VL = MBBI->getOperand(2).getReg();
+ auto ZvlssegInfo = TII->isRVVSpillForZvlsseg(MBBI->getOpcode());
+ if (!ZvlssegInfo)
+ return false;
+ unsigned NF = ZvlssegInfo->first;
+ unsigned LMUL = ZvlssegInfo->second;
+ assert(NF * LMUL <= 8 && "Invalid NF/LMUL combinations.");
+ unsigned Opcode = RISCV::VL1RE8_V;
+ unsigned SubRegIdx = RISCV::sub_vrm1_0;
+ static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
+ "Unexpected subreg numbering");
+ if (LMUL == 2) {
+ Opcode = RISCV::VL2RE8_V;
+ SubRegIdx = RISCV::sub_vrm2_0;
+ static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
+ "Unexpected subreg numbering");
+ } else if (LMUL == 4) {
+ Opcode = RISCV::VL4RE8_V;
+ SubRegIdx = RISCV::sub_vrm4_0;
+ static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
+ "Unexpected subreg numbering");
+ } else
+ assert(LMUL == 1 && "LMUL must be 1, 2, or 4.");
+
+ for (unsigned I = 0; I < NF; ++I) {
+ BuildMI(MBB, MBBI, DL, TII->get(Opcode),
+ TRI->getSubReg(DestReg, SubRegIdx + I))
+ .addReg(Base)
+ .addMemOperand(*(MBBI->memoperands_begin()));
+ if (I != NF - 1)
+ BuildMI(MBB, MBBI, DL, TII->get(RISCV::ADD), Base)
+ .addReg(Base)
+ .addReg(VL);
+ }
+ MBBI->eraseFromParent();
+ return true;
+}
+
} // end of anonymous namespace
INITIALIZE_PASS(RISCVExpandPseudo, "riscv-expand-pseudo",
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index a2ce3597be8f..7d205d76b55c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -167,29 +167,56 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineFrameInfo &MFI = MF->getFrameInfo();
unsigned Opcode;
- bool IsScalableVector = false;
- if (RISCV::GPRRegClass.hasSubClassEq(RC))
+ bool IsScalableVector = true;
+ bool IsZvlsseg = true;
+ if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::SW : RISCV::SD;
- else if (RISCV::FPR16RegClass.hasSubClassEq(RC))
+ IsScalableVector = false;
+ } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FSH;
- else if (RISCV::FPR32RegClass.hasSubClassEq(RC))
+ IsScalableVector = false;
+ } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FSW;
- else if (RISCV::FPR64RegClass.hasSubClassEq(RC))
+ IsScalableVector = false;
+ } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FSD;
- else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
+ IsScalableVector = false;
+ } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVSPILL_M1;
- IsScalableVector = true;
+ IsZvlsseg = false;
} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVSPILL_M2;
- IsScalableVector = true;
+ IsZvlsseg = false;
} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVSPILL_M4;
- IsScalableVector = true;
+ IsZvlsseg = false;
} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVSPILL_M8;
- IsScalableVector = true;
- } else
+ IsZvlsseg = false;
+ } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVSPILL2_M1;
+ else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVSPILL2_M2;
+ else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVSPILL2_M4;
+ else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVSPILL3_M1;
+ else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVSPILL3_M2;
+ else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVSPILL4_M1;
+ else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVSPILL4_M2;
+ else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVSPILL5_M1;
+ else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVSPILL6_M1;
+ else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVSPILL7_M1;
+ else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVSPILL8_M1;
+ else
llvm_unreachable("Can't store this register to stack slot");
if (IsScalableVector) {
@@ -198,10 +225,16 @@ void RISCVInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
MFI.setStackID(FI, TargetStackID::ScalableVector);
- BuildMI(MBB, I, DL, get(Opcode))
- .addReg(SrcReg, getKillRegState(IsKill))
- .addFrameIndex(FI)
- .addMemOperand(MMO);
+ auto MIB = BuildMI(MBB, I, DL, get(Opcode))
+ .addReg(SrcReg, getKillRegState(IsKill))
+ .addFrameIndex(FI)
+ .addMemOperand(MMO);
+ if (IsZvlsseg) {
+ // For spilling/reloading Zvlsseg registers, append the dummy field for
+ // the scaled vector length. The argument will be used when expanding
+ // these pseudo instructions.
+ MIB.addReg(RISCV::X0);
+ }
} else {
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOStore,
@@ -228,29 +261,56 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineFrameInfo &MFI = MF->getFrameInfo();
unsigned Opcode;
- bool IsScalableVector = false;
- if (RISCV::GPRRegClass.hasSubClassEq(RC))
+ bool IsScalableVector = true;
+ bool IsZvlsseg = true;
+ if (RISCV::GPRRegClass.hasSubClassEq(RC)) {
Opcode = TRI->getRegSizeInBits(RISCV::GPRRegClass) == 32 ?
RISCV::LW : RISCV::LD;
- else if (RISCV::FPR16RegClass.hasSubClassEq(RC))
+ IsScalableVector = false;
+ } else if (RISCV::FPR16RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FLH;
- else if (RISCV::FPR32RegClass.hasSubClassEq(RC))
+ IsScalableVector = false;
+ } else if (RISCV::FPR32RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FLW;
- else if (RISCV::FPR64RegClass.hasSubClassEq(RC))
+ IsScalableVector = false;
+ } else if (RISCV::FPR64RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::FLD;
- else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
+ IsScalableVector = false;
+ } else if (RISCV::VRRegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVRELOAD_M1;
- IsScalableVector = true;
+ IsZvlsseg = false;
} else if (RISCV::VRM2RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVRELOAD_M2;
- IsScalableVector = true;
+ IsZvlsseg = false;
} else if (RISCV::VRM4RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVRELOAD_M4;
- IsScalableVector = true;
+ IsZvlsseg = false;
} else if (RISCV::VRM8RegClass.hasSubClassEq(RC)) {
Opcode = RISCV::PseudoVRELOAD_M8;
- IsScalableVector = true;
- } else
+ IsZvlsseg = false;
+ } else if (RISCV::VRN2M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVRELOAD2_M1;
+ else if (RISCV::VRN2M2RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVRELOAD2_M2;
+ else if (RISCV::VRN2M4RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVRELOAD2_M4;
+ else if (RISCV::VRN3M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVRELOAD3_M1;
+ else if (RISCV::VRN3M2RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVRELOAD3_M2;
+ else if (RISCV::VRN4M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVRELOAD4_M1;
+ else if (RISCV::VRN4M2RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVRELOAD4_M2;
+ else if (RISCV::VRN5M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVRELOAD5_M1;
+ else if (RISCV::VRN6M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVRELOAD6_M1;
+ else if (RISCV::VRN7M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVRELOAD7_M1;
+ else if (RISCV::VRN8M1RegClass.hasSubClassEq(RC))
+ Opcode = RISCV::PseudoVRELOAD8_M1;
+ else
llvm_unreachable("Can't load this register from stack slot");
if (IsScalableVector) {
@@ -259,9 +319,15 @@ void RISCVInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MemoryLocation::UnknownSize, MFI.getObjectAlign(FI));
MFI.setStackID(FI, TargetStackID::ScalableVector);
- BuildMI(MBB, I, DL, get(Opcode), DstReg)
- .addFrameIndex(FI)
- .addMemOperand(MMO);
+ auto MIB = BuildMI(MBB, I, DL, get(Opcode), DstReg)
+ .addFrameIndex(FI)
+ .addMemOperand(MMO);
+ if (IsZvlsseg) {
+ // For spilling/reloading Zvlsseg registers, append the dummy field for
+ // the scaled vector length. The argument will be used when expanding
+ // these pseudo instructions.
+ MIB.addReg(RISCV::X0);
+ }
} else {
MachineMemOperand *MMO = MF->getMachineMemOperand(
MachinePointerInfo::getFixedStack(*MF, FI), MachineMemOperand::MOLoad,
@@ -1217,3 +1283,44 @@ Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
return FactorRegister;
}
+
+Optional<std::pair<unsigned, unsigned>>
+RISCVInstrInfo::isRVVSpillForZvlsseg(unsigned Opcode) const {
+ switch (Opcode) {
+ default:
+ return None;
+ case RISCV::PseudoVSPILL2_M1:
+ case RISCV::PseudoVRELOAD2_M1:
+ return std::make_pair(2u, 1u);
+ case RISCV::PseudoVSPILL2_M2:
+ case RISCV::PseudoVRELOAD2_M2:
+ return std::make_pair(2u, 2u);
+ case RISCV::PseudoVSPILL2_M4:
+ case RISCV::PseudoVRELOAD2_M4:
+ return std::make_pair(2u, 4u);
+ case RISCV::PseudoVSPILL3_M1:
+ case RISCV::PseudoVRELOAD3_M1:
+ return std::make_pair(3u, 1u);
+ case RISCV::PseudoVSPILL3_M2:
+ case RISCV::PseudoVRELOAD3_M2:
+ return std::make_pair(3u, 2u);
+ case RISCV::PseudoVSPILL4_M1:
+ case RISCV::PseudoVRELOAD4_M1:
+ return std::make_pair(4u, 1u);
+ case RISCV::PseudoVSPILL4_M2:
+ case RISCV::PseudoVRELOAD4_M2:
+ return std::make_pair(4u, 2u);
+ case RISCV::PseudoVSPILL5_M1:
+ case RISCV::PseudoVRELOAD5_M1:
+ return std::make_pair(5u, 1u);
+ case RISCV::PseudoVSPILL6_M1:
+ case RISCV::PseudoVRELOAD6_M1:
+ return std::make_pair(6u, 1u);
+ case RISCV::PseudoVSPILL7_M1:
+ case RISCV::PseudoVRELOAD7_M1:
+ return std::make_pair(7u, 1u);
+ case RISCV::PseudoVSPILL8_M1:
+ case RISCV::PseudoVRELOAD8_M1:
+ return std::make_pair(8u, 1u);
+ }
+}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index f15d61ede037..ae03d121f42d 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -147,6 +147,9 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
MachineBasicBlock::iterator II,
int64_t Amount) const;
+ Optional<std::pair<unsigned, unsigned>>
+ isRVVSpillForZvlsseg(unsigned Opcode) const;
+
protected:
const RISCVSubtarget &STI;
};
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 006703e97f6d..583b6393581f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -3171,6 +3171,20 @@ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in {
def PseudoVRELOAD_M8 : VPseudo<VL8RE8_V, V_M8, (outs VRM8:$rs1), (ins GPR:$rs2)>;
}
+foreach lmul = MxList.m in {
+ foreach nf = NFSet<lmul>.L in {
+ defvar vreg = SegRegClass<lmul, nf>.RC;
+ let hasSideEffects = 0, mayLoad = 0, mayStore = 1, isCodeGenOnly = 1 in {
+ def "PseudoVSPILL" # nf # "_" # lmul.MX :
+ Pseudo<(outs), (ins vreg:$rs1, GPR:$rs2, GPR:$vlenb), []>;
+ }
+ let hasSideEffects = 0, mayLoad = 1, mayStore = 0, isCodeGenOnly = 1 in {
+ def "PseudoVRELOAD" # nf # "_" # lmul.MX :
+ Pseudo<(outs vreg:$rs1), (ins GPR:$rs2, GPR:$vlenb), []>;
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// 6. Configuration-Setting Instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index e1cd29c49158..ad6d3af21d58 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -195,7 +195,8 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
StackOffset Offset =
getFrameLowering(MF)->getFrameIndexReference(MF, FrameIndex, FrameReg);
bool isRVV = RISCVVPseudosTable::getPseudoInfo(MI.getOpcode()) ||
- isRVVWholeLoadStore(MI.getOpcode());
+ isRVVWholeLoadStore(MI.getOpcode()) ||
+ TII->isRVVSpillForZvlsseg(MI.getOpcode());
if (!isRVV)
Offset += StackOffset::getFixed(MI.getOperand(FIOperandNum + 1).getImm());
@@ -268,6 +269,16 @@ void RISCVRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
if (!isRVV)
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset.getFixed());
}
+
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ auto ZvlssegInfo = TII->isRVVSpillForZvlsseg(MI.getOpcode());
+ if (ZvlssegInfo) {
+ int64_t ScalableValue = MFI.getObjectSize(FrameIndex) / ZvlssegInfo->first;
+ Register FactorRegister =
+ TII->getVLENFactoredAmount(MF, MBB, II, ScalableValue);
+ MI.getOperand(FIOperandNum + 1)
+ .ChangeToRegister(FactorRegister, /*isDef=*/false);
+ }
}
Register RISCVRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
new file mode 100644
index 000000000000..d549c03d9d02
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-zvlsseg.ll
@@ -0,0 +1,299 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+m -O0 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O0 %s
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -mattr=+m -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2 %s
+
+define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(i32* %base, i32 %vl) nounwind {
+; SPILL-O0-LABEL: spill_zvlsseg_nxv1i32:
+; SPILL-O0: # %bb.0: # %entry
+; SPILL-O0-NEXT: addi sp, sp, -16
+; SPILL-O0-NEXT: csrr a2, vlenb
+; SPILL-O0-NEXT: sub sp, sp, a2
+; SPILL-O0-NEXT: vsetvli a1, a1, e32,mf2,ta,mu
+; SPILL-O0-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O0-NEXT: vmv1r.v v25, v1
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
+; SPILL-O0-NEXT: #APP
+; SPILL-O0-NEXT: #NO_APP
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-NEXT: csrr a0, vlenb
+; SPILL-O0-NEXT: add sp, sp, a0
+; SPILL-O0-NEXT: addi sp, sp, 16
+; SPILL-O0-NEXT: ret
+;
+; SPILL-O2-LABEL: spill_zvlsseg_nxv1i32:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a2, vlenb
+; SPILL-O2-NEXT: slli a2, a2, 1
+; SPILL-O2-NEXT: sub sp, sp, a2
+; SPILL-O2-NEXT: vsetvli a1, a1, e32,mf2,ta,mu
+; SPILL-O2-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 1
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ %0 = tail call {<vscale x 1 x i32>,<vscale x 1 x i32>} @llvm.riscv.vlseg2.nxv1i32(i32* %base, i32 %vl)
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ %1 = extractvalue {<vscale x 1 x i32>,<vscale x 1 x i32>} %0, 1
+ ret <vscale x 1 x i32> %1
+}
+
+define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(i32* %base, i32 %vl) nounwind {
+; SPILL-O0-LABEL: spill_zvlsseg_nxv2i32:
+; SPILL-O0: # %bb.0: # %entry
+; SPILL-O0-NEXT: addi sp, sp, -16
+; SPILL-O0-NEXT: csrr a2, vlenb
+; SPILL-O0-NEXT: sub sp, sp, a2
+; SPILL-O0-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; SPILL-O0-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O0-NEXT: vmv1r.v v25, v1
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
+; SPILL-O0-NEXT: #APP
+; SPILL-O0-NEXT: #NO_APP
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-NEXT: csrr a0, vlenb
+; SPILL-O0-NEXT: add sp, sp, a0
+; SPILL-O0-NEXT: addi sp, sp, 16
+; SPILL-O0-NEXT: ret
+;
+; SPILL-O2-LABEL: spill_zvlsseg_nxv2i32:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a2, vlenb
+; SPILL-O2-NEXT: slli a2, a2, 1
+; SPILL-O2-NEXT: sub sp, sp, a2
+; SPILL-O2-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; SPILL-O2-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 1
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ %0 = tail call {<vscale x 2 x i32>,<vscale x 2 x i32>} @llvm.riscv.vlseg2.nxv2i32(i32* %base, i32 %vl)
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ %1 = extractvalue {<vscale x 2 x i32>,<vscale x 2 x i32>} %0, 1
+ ret <vscale x 2 x i32> %1
+}
+
+define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(i32* %base, i32 %vl) nounwind {
+; SPILL-O0-LABEL: spill_zvlsseg_nxv4i32:
+; SPILL-O0: # %bb.0: # %entry
+; SPILL-O0-NEXT: addi sp, sp, -16
+; SPILL-O0-NEXT: csrr a2, vlenb
+; SPILL-O0-NEXT: slli a2, a2, 1
+; SPILL-O0-NEXT: sub sp, sp, a2
+; SPILL-O0-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; SPILL-O0-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O0-NEXT: vmv2r.v v26, v2
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vs2r.v v26, (a0) # Unknown-size Folded Spill
+; SPILL-O0-NEXT: #APP
+; SPILL-O0-NEXT: #NO_APP
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-NEXT: csrr a0, vlenb
+; SPILL-O0-NEXT: slli a0, a0, 1
+; SPILL-O0-NEXT: add sp, sp, a0
+; SPILL-O0-NEXT: addi sp, sp, 16
+; SPILL-O0-NEXT: ret
+;
+; SPILL-O2-LABEL: spill_zvlsseg_nxv4i32:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a2, vlenb
+; SPILL-O2-NEXT: slli a2, a2, 2
+; SPILL-O2-NEXT: sub sp, sp, a2
+; SPILL-O2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; SPILL-O2-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: slli a1, a1, 1
+; SPILL-O2-NEXT: vs2r.v v0, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: slli a1, a1, 1
+; SPILL-O2-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 2
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ %0 = tail call {<vscale x 4 x i32>,<vscale x 4 x i32>} @llvm.riscv.vlseg2.nxv4i32(i32* %base, i32 %vl)
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ %1 = extractvalue {<vscale x 4 x i32>,<vscale x 4 x i32>} %0, 1
+ ret <vscale x 4 x i32> %1
+}
+
+define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(i32* %base, i32 %vl) nounwind {
+; SPILL-O0-LABEL: spill_zvlsseg_nxv8i32:
+; SPILL-O0: # %bb.0: # %entry
+; SPILL-O0-NEXT: addi sp, sp, -16
+; SPILL-O0-NEXT: csrr a2, vlenb
+; SPILL-O0-NEXT: slli a2, a2, 2
+; SPILL-O0-NEXT: sub sp, sp, a2
+; SPILL-O0-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; SPILL-O0-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O0-NEXT: vmv4r.v v28, v4
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vs4r.v v28, (a0) # Unknown-size Folded Spill
+; SPILL-O0-NEXT: #APP
+; SPILL-O0-NEXT: #NO_APP
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-NEXT: csrr a0, vlenb
+; SPILL-O0-NEXT: slli a0, a0, 2
+; SPILL-O0-NEXT: add sp, sp, a0
+; SPILL-O0-NEXT: addi sp, sp, 16
+; SPILL-O0-NEXT: ret
+;
+; SPILL-O2-LABEL: spill_zvlsseg_nxv8i32:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a2, vlenb
+; SPILL-O2-NEXT: slli a2, a2, 3
+; SPILL-O2-NEXT: sub sp, sp, a2
+; SPILL-O2-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; SPILL-O2-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: slli a1, a1, 2
+; SPILL-O2-NEXT: vs4r.v v0, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: slli a1, a1, 2
+; SPILL-O2-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 3
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ %0 = tail call {<vscale x 8 x i32>,<vscale x 8 x i32>} @llvm.riscv.vlseg2.nxv8i32(i32* %base, i32 %vl)
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ %1 = extractvalue {<vscale x 8 x i32>,<vscale x 8 x i32>} %0, 1
+ ret <vscale x 8 x i32> %1
+}
+
+define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(i32* %base, i32 %vl) nounwind {
+; SPILL-O0-LABEL: spill_zvlsseg3_nxv4i32:
+; SPILL-O0: # %bb.0: # %entry
+; SPILL-O0-NEXT: addi sp, sp, -16
+; SPILL-O0-NEXT: csrr a2, vlenb
+; SPILL-O0-NEXT: slli a2, a2, 1
+; SPILL-O0-NEXT: sub sp, sp, a2
+; SPILL-O0-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; SPILL-O0-NEXT: vlseg3e32.v v0, (a0)
+; SPILL-O0-NEXT: vmv2r.v v26, v2
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vs2r.v v26, (a0) # Unknown-size Folded Spill
+; SPILL-O0-NEXT: #APP
+; SPILL-O0-NEXT: #NO_APP
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-NEXT: csrr a0, vlenb
+; SPILL-O0-NEXT: slli a0, a0, 1
+; SPILL-O0-NEXT: add sp, sp, a0
+; SPILL-O0-NEXT: addi sp, sp, 16
+; SPILL-O0-NEXT: ret
+;
+; SPILL-O2-LABEL: spill_zvlsseg3_nxv4i32:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a2, vlenb
+; SPILL-O2-NEXT: addi a3, zero, 6
+; SPILL-O2-NEXT: mul a2, a2, a3
+; SPILL-O2-NEXT: sub sp, sp, a2
+; SPILL-O2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; SPILL-O2-NEXT: vlseg3e32.v v0, (a0)
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: slli a1, a1, 1
+; SPILL-O2-NEXT: vs2r.v v0, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vs2r.v v4, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: slli a1, a1, 1
+; SPILL-O2-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2_v10m2
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: addi a1, zero, 6
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ %0 = tail call {<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>} @llvm.riscv.vlseg3.nxv4i32(i32* %base, i32 %vl)
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ %1 = extractvalue {<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>} %0, 1
+ ret <vscale x 4 x i32> %1
+}
+
+declare {<vscale x 1 x i32>,<vscale x 1 x i32>} @llvm.riscv.vlseg2.nxv1i32(i32* , i32)
+declare {<vscale x 2 x i32>,<vscale x 2 x i32>} @llvm.riscv.vlseg2.nxv2i32(i32* , i32)
+declare {<vscale x 4 x i32>,<vscale x 4 x i32>} @llvm.riscv.vlseg2.nxv4i32(i32* , i32)
+declare {<vscale x 8 x i32>,<vscale x 8 x i32>} @llvm.riscv.vlseg2.nxv8i32(i32* , i32)
+declare {<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>} @llvm.riscv.vlseg3.nxv4i32(i32* , i32)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
new file mode 100644
index 000000000000..bbda9980380b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-zvlsseg.ll
@@ -0,0 +1,299 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+m -O0 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O0 %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -mattr=+m -O2 < %s \
+; RUN: | FileCheck --check-prefix=SPILL-O2 %s
+
+define <vscale x 1 x i32> @spill_zvlsseg_nxv1i32(i32* %base, i64 %vl) nounwind {
+; SPILL-O0-LABEL: spill_zvlsseg_nxv1i32:
+; SPILL-O0: # %bb.0: # %entry
+; SPILL-O0-NEXT: addi sp, sp, -16
+; SPILL-O0-NEXT: csrr a2, vlenb
+; SPILL-O0-NEXT: sub sp, sp, a2
+; SPILL-O0-NEXT: vsetvli a1, a1, e32,mf2,ta,mu
+; SPILL-O0-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O0-NEXT: vmv1r.v v25, v1
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
+; SPILL-O0-NEXT: #APP
+; SPILL-O0-NEXT: #NO_APP
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-NEXT: csrr a0, vlenb
+; SPILL-O0-NEXT: add sp, sp, a0
+; SPILL-O0-NEXT: addi sp, sp, 16
+; SPILL-O0-NEXT: ret
+;
+; SPILL-O2-LABEL: spill_zvlsseg_nxv1i32:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a2, vlenb
+; SPILL-O2-NEXT: slli a2, a2, 1
+; SPILL-O2-NEXT: sub sp, sp, a2
+; SPILL-O2-NEXT: vsetvli a1, a1, e32,mf2,ta,mu
+; SPILL-O2-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 1
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ %0 = tail call {<vscale x 1 x i32>,<vscale x 1 x i32>} @llvm.riscv.vlseg2.nxv1i32(i32* %base, i64 %vl)
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ %1 = extractvalue {<vscale x 1 x i32>,<vscale x 1 x i32>} %0, 1
+ ret <vscale x 1 x i32> %1
+}
+
+define <vscale x 2 x i32> @spill_zvlsseg_nxv2i32(i32* %base, i64 %vl) nounwind {
+; SPILL-O0-LABEL: spill_zvlsseg_nxv2i32:
+; SPILL-O0: # %bb.0: # %entry
+; SPILL-O0-NEXT: addi sp, sp, -16
+; SPILL-O0-NEXT: csrr a2, vlenb
+; SPILL-O0-NEXT: sub sp, sp, a2
+; SPILL-O0-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; SPILL-O0-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O0-NEXT: vmv1r.v v25, v1
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vs1r.v v25, (a0) # Unknown-size Folded Spill
+; SPILL-O0-NEXT: #APP
+; SPILL-O0-NEXT: #NO_APP
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-NEXT: csrr a0, vlenb
+; SPILL-O0-NEXT: add sp, sp, a0
+; SPILL-O0-NEXT: addi sp, sp, 16
+; SPILL-O0-NEXT: ret
+;
+; SPILL-O2-LABEL: spill_zvlsseg_nxv2i32:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a2, vlenb
+; SPILL-O2-NEXT: slli a2, a2, 1
+; SPILL-O2-NEXT: sub sp, sp, a2
+; SPILL-O2-NEXT: vsetvli a1, a1, e32,m1,ta,mu
+; SPILL-O2-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: vs1r.v v0, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vs1r.v v1, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: vl1r.v v7, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: # kill: def $v8 killed $v8 killed $v7_v8
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 1
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ %0 = tail call {<vscale x 2 x i32>,<vscale x 2 x i32>} @llvm.riscv.vlseg2.nxv2i32(i32* %base, i64 %vl)
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ %1 = extractvalue {<vscale x 2 x i32>,<vscale x 2 x i32>} %0, 1
+ ret <vscale x 2 x i32> %1
+}
+
+define <vscale x 4 x i32> @spill_zvlsseg_nxv4i32(i32* %base, i64 %vl) nounwind {
+; SPILL-O0-LABEL: spill_zvlsseg_nxv4i32:
+; SPILL-O0: # %bb.0: # %entry
+; SPILL-O0-NEXT: addi sp, sp, -16
+; SPILL-O0-NEXT: csrr a2, vlenb
+; SPILL-O0-NEXT: slli a2, a2, 1
+; SPILL-O0-NEXT: sub sp, sp, a2
+; SPILL-O0-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; SPILL-O0-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O0-NEXT: vmv2r.v v26, v2
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vs2r.v v26, (a0) # Unknown-size Folded Spill
+; SPILL-O0-NEXT: #APP
+; SPILL-O0-NEXT: #NO_APP
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-NEXT: csrr a0, vlenb
+; SPILL-O0-NEXT: slli a0, a0, 1
+; SPILL-O0-NEXT: add sp, sp, a0
+; SPILL-O0-NEXT: addi sp, sp, 16
+; SPILL-O0-NEXT: ret
+;
+; SPILL-O2-LABEL: spill_zvlsseg_nxv4i32:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a2, vlenb
+; SPILL-O2-NEXT: slli a2, a2, 2
+; SPILL-O2-NEXT: sub sp, sp, a2
+; SPILL-O2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; SPILL-O2-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: slli a1, a1, 1
+; SPILL-O2-NEXT: vs2r.v v0, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: slli a1, a1, 1
+; SPILL-O2-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 2
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ %0 = tail call {<vscale x 4 x i32>,<vscale x 4 x i32>} @llvm.riscv.vlseg2.nxv4i32(i32* %base, i64 %vl)
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ %1 = extractvalue {<vscale x 4 x i32>,<vscale x 4 x i32>} %0, 1
+ ret <vscale x 4 x i32> %1
+}
+
+define <vscale x 8 x i32> @spill_zvlsseg_nxv8i32(i32* %base, i64 %vl) nounwind {
+; SPILL-O0-LABEL: spill_zvlsseg_nxv8i32:
+; SPILL-O0: # %bb.0: # %entry
+; SPILL-O0-NEXT: addi sp, sp, -16
+; SPILL-O0-NEXT: csrr a2, vlenb
+; SPILL-O0-NEXT: slli a2, a2, 2
+; SPILL-O0-NEXT: sub sp, sp, a2
+; SPILL-O0-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; SPILL-O0-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O0-NEXT: vmv4r.v v28, v4
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vs4r.v v28, (a0) # Unknown-size Folded Spill
+; SPILL-O0-NEXT: #APP
+; SPILL-O0-NEXT: #NO_APP
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vl4re8.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-NEXT: csrr a0, vlenb
+; SPILL-O0-NEXT: slli a0, a0, 2
+; SPILL-O0-NEXT: add sp, sp, a0
+; SPILL-O0-NEXT: addi sp, sp, 16
+; SPILL-O0-NEXT: ret
+;
+; SPILL-O2-LABEL: spill_zvlsseg_nxv8i32:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a2, vlenb
+; SPILL-O2-NEXT: slli a2, a2, 3
+; SPILL-O2-NEXT: sub sp, sp, a2
+; SPILL-O2-NEXT: vsetvli a1, a1, e32,m4,ta,mu
+; SPILL-O2-NEXT: vlseg2e32.v v0, (a0)
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: slli a1, a1, 2
+; SPILL-O2-NEXT: vs4r.v v0, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vs4r.v v4, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: slli a1, a1, 2
+; SPILL-O2-NEXT: vl4r.v v4, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vl4r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: # kill: def $v8m4 killed $v8m4 killed $v4m4_v8m4
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: slli a0, a0, 3
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ %0 = tail call {<vscale x 8 x i32>,<vscale x 8 x i32>} @llvm.riscv.vlseg2.nxv8i32(i32* %base, i64 %vl)
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ %1 = extractvalue {<vscale x 8 x i32>,<vscale x 8 x i32>} %0, 1
+ ret <vscale x 8 x i32> %1
+}
+
+define <vscale x 4 x i32> @spill_zvlsseg3_nxv4i32(i32* %base, i64 %vl) nounwind {
+; SPILL-O0-LABEL: spill_zvlsseg3_nxv4i32:
+; SPILL-O0: # %bb.0: # %entry
+; SPILL-O0-NEXT: addi sp, sp, -16
+; SPILL-O0-NEXT: csrr a2, vlenb
+; SPILL-O0-NEXT: slli a2, a2, 1
+; SPILL-O0-NEXT: sub sp, sp, a2
+; SPILL-O0-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; SPILL-O0-NEXT: vlseg3e32.v v0, (a0)
+; SPILL-O0-NEXT: vmv2r.v v26, v2
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vs2r.v v26, (a0) # Unknown-size Folded Spill
+; SPILL-O0-NEXT: #APP
+; SPILL-O0-NEXT: #NO_APP
+; SPILL-O0-NEXT: addi a0, sp, 16
+; SPILL-O0-NEXT: vl2re8.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O0-NEXT: csrr a0, vlenb
+; SPILL-O0-NEXT: slli a0, a0, 1
+; SPILL-O0-NEXT: add sp, sp, a0
+; SPILL-O0-NEXT: addi sp, sp, 16
+; SPILL-O0-NEXT: ret
+;
+; SPILL-O2-LABEL: spill_zvlsseg3_nxv4i32:
+; SPILL-O2: # %bb.0: # %entry
+; SPILL-O2-NEXT: addi sp, sp, -16
+; SPILL-O2-NEXT: csrr a2, vlenb
+; SPILL-O2-NEXT: addi a3, zero, 6
+; SPILL-O2-NEXT: mul a2, a2, a3
+; SPILL-O2-NEXT: sub sp, sp, a2
+; SPILL-O2-NEXT: vsetvli a1, a1, e32,m2,ta,mu
+; SPILL-O2-NEXT: vlseg3e32.v v0, (a0)
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: slli a1, a1, 1
+; SPILL-O2-NEXT: vs2r.v v0, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vs2r.v v2, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vs2r.v v4, (a0) # Unknown-size Folded Spill
+; SPILL-O2-NEXT: #APP
+; SPILL-O2-NEXT: #NO_APP
+; SPILL-O2-NEXT: addi a0, sp, 16
+; SPILL-O2-NEXT: csrr a1, vlenb
+; SPILL-O2-NEXT: slli a1, a1, 1
+; SPILL-O2-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vl2r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: add a0, a0, a1
+; SPILL-O2-NEXT: vl2r.v v10, (a0) # Unknown-size Folded Reload
+; SPILL-O2-NEXT: # kill: def $v8m2 killed $v8m2 killed $v6m2_v8m2_v10m2
+; SPILL-O2-NEXT: csrr a0, vlenb
+; SPILL-O2-NEXT: addi a1, zero, 6
+; SPILL-O2-NEXT: mul a0, a0, a1
+; SPILL-O2-NEXT: add sp, sp, a0
+; SPILL-O2-NEXT: addi sp, sp, 16
+; SPILL-O2-NEXT: ret
+entry:
+ %0 = tail call {<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>} @llvm.riscv.vlseg3.nxv4i32(i32* %base, i64 %vl)
+ call void asm sideeffect "",
+ "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
+ %1 = extractvalue {<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>} %0, 1
+ ret <vscale x 4 x i32> %1
+}
+
+declare {<vscale x 1 x i32>,<vscale x 1 x i32>} @llvm.riscv.vlseg2.nxv1i32(i32* , i64)
+declare {<vscale x 2 x i32>,<vscale x 2 x i32>} @llvm.riscv.vlseg2.nxv2i32(i32* , i64)
+declare {<vscale x 4 x i32>,<vscale x 4 x i32>} @llvm.riscv.vlseg2.nxv4i32(i32* , i64)
+declare {<vscale x 8 x i32>,<vscale x 8 x i32>} @llvm.riscv.vlseg2.nxv8i32(i32* , i64)
+declare {<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>} @llvm.riscv.vlseg3.nxv4i32(i32* , i64)
More information about the llvm-commits
mailing list