[llvm] [RISCV] Allow VCIX with SE to reorder (PR #77049)
Brandon Wu via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 4 22:33:48 PST 2024
https://github.com/4vtomat created https://github.com/llvm/llvm-project/pull/77049
This patch sets HasSideEffect to 0, it allows VCIX instructions
that have side effect to be reordered.
>From 28b8a47e949e4eee2408789efe03cc1ca06a9363 Mon Sep 17 00:00:00 2001
From: Brandon Wu <brandon.wu at sifive.com>
Date: Thu, 4 Jan 2024 21:44:34 -0800
Subject: [PATCH] [RISCV] Allow VCIX with SE to reorder
This patch sets HasSideEffect to 0, it allows VCIX instructions
that have side effect to be reordered.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 292 ++++-
llvm/lib/Target/RISCV/RISCVISelLowering.h | 71 ++
llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td | 229 +++-
llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp | 3 +
llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 3 +
llvm/test/CodeGen/RISCV/pr69586.ll | 1267 ++++++++++---------
6 files changed, 1169 insertions(+), 696 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index bc4b2b022c0ae9..512c7b340cb47b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8308,25 +8308,63 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
}
-static void getVCIXOperands(SDValue &Op, SelectionDAG &DAG,
- SmallVector<SDValue> &Ops) {
+static inline void promoteVCIXScalar(const SDValue &Op,
+ SmallVectorImpl<SDValue> &Operands,
+ SelectionDAG &DAG) {
+ const RISCVSubtarget &Subtarget =
+ DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
+
+ bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
+ unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
SDLoc DL(Op);
+ const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
+ RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
+ if (!II || !II->hasScalarOperand())
+ return;
+
+ unsigned SplatOp = II->ScalarOperand + 1;
+ assert(SplatOp < Op.getNumOperands());
+
+ SDValue &ScalarOp = Operands[SplatOp];
+ MVT OpVT = ScalarOp.getSimpleValueType();
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ // The code below is partially copied from lowerVectorIntrinsicScalars.
+ // If this isn't a scalar, or its type is XLenVT we're done.
+ if (!OpVT.isScalarInteger() || OpVT == XLenVT)
+ return;
+
+ // Manually emit promote operation for scalar operation.
+ if (OpVT.bitsLT(XLenVT)) {
+ unsigned ExtOpc =
+ isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
+ ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
+ }
+
+ return;
+}
+
+static void processVCIXOperands(SDValue &OrigOp,
+ SmallVectorImpl<SDValue> &Operands,
+ SelectionDAG &DAG) {
+ promoteVCIXScalar(OrigOp, Operands, DAG);
const RISCVSubtarget &Subtarget =
DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
- for (const SDValue &V : Op->op_values()) {
+ for (SDValue &V : Operands) {
EVT ValType = V.getValueType();
- if (ValType.isScalableVector() && ValType.isFloatingPoint()) {
+ if (ValType.isVector() && ValType.isFloatingPoint()) {
MVT InterimIVT =
MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
ValType.getVectorElementCount());
- Ops.push_back(DAG.getBitcast(InterimIVT, V));
- } else if (ValType.isFixedLengthVector()) {
+ V = DAG.getBitcast(InterimIVT, V);
+ }
+ if (ValType.isFixedLengthVector()) {
MVT OpContainerVT = getContainerForFixedLengthVector(
DAG, V.getSimpleValueType(), Subtarget);
- Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
- } else
- Ops.push_back(V);
+ V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
+ }
}
}
@@ -8557,8 +8595,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::riscv_sf_vc_v_fvw: {
MVT VT = Op.getSimpleValueType();
- SmallVector<SDValue> Ops;
- getVCIXOperands(Op, DAG, Ops);
+ SmallVector<SDValue> Operands{Op->op_values()};
+ processVCIXOperands(Op, Operands, DAG);
MVT RetVT = VT;
if (VT.isFixedLengthVector())
@@ -8567,7 +8605,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
VT.getVectorElementCount());
- SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Ops);
+ SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
if (VT.isFixedLengthVector())
NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
@@ -8584,6 +8622,52 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
}
+static inline SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG,
+ unsigned Type) {
+ SDLoc DL(Op);
+ SmallVector<SDValue> Operands{Op->op_values()};
+ Operands.erase(Operands.begin() + 1);
+
+ const RISCVSubtarget &Subtarget =
+ DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
+ MVT VT = Op.getSimpleValueType();
+ MVT RetVT = VT;
+ MVT FloatVT = VT;
+
+ if (VT.isFloatingPoint()) {
+ RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
+ VT.getVectorElementCount());
+ FloatVT = RetVT;
+ }
+ if (VT.isFixedLengthVector())
+ RetVT = getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), RetVT,
+ Subtarget);
+
+ processVCIXOperands(Op, Operands, DAG);
+
+ SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
+ SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
+ SDValue Chain = NewNode.getValue(1);
+
+ if (VT.isFixedLengthVector())
+ NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
+ if (VT.isFloatingPoint())
+ NewNode = DAG.getBitcast(VT, NewNode);
+
+ NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
+
+ return NewNode;
+}
+
+static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG,
+ unsigned Type) {
+ SmallVector<SDValue> Operands{Op->op_values()};
+ Operands.erase(Operands.begin() + 1);
+ processVCIXOperands(Op, Operands, DAG);
+
+ return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
+}
+
SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
SelectionDAG &DAG) const {
unsigned IntNo = Op.getConstantOperandVal(1);
@@ -8701,48 +8785,33 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
return DAG.getMergeValues(Results, DL);
}
case Intrinsic::riscv_sf_vc_v_x_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
case Intrinsic::riscv_sf_vc_v_i_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
case Intrinsic::riscv_sf_vc_v_xv_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
case Intrinsic::riscv_sf_vc_v_iv_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
case Intrinsic::riscv_sf_vc_v_vv_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
case Intrinsic::riscv_sf_vc_v_fv_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
case Intrinsic::riscv_sf_vc_v_xvv_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
case Intrinsic::riscv_sf_vc_v_ivv_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
case Intrinsic::riscv_sf_vc_v_vvv_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
case Intrinsic::riscv_sf_vc_v_fvv_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
case Intrinsic::riscv_sf_vc_v_xvw_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
case Intrinsic::riscv_sf_vc_v_ivw_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
case Intrinsic::riscv_sf_vc_v_vvw_se:
- case Intrinsic::riscv_sf_vc_v_fvw_se: {
- MVT VT = Op.getSimpleValueType();
- SDLoc DL(Op);
- SmallVector<SDValue> Ops;
- getVCIXOperands(Op, DAG, Ops);
-
- MVT RetVT = VT;
- if (VT.isFixedLengthVector())
- RetVT = getContainerForFixedLengthVector(VT);
- else if (VT.isFloatingPoint())
- RetVT = MVT::getVectorVT(MVT::getIntegerVT(RetVT.getScalarSizeInBits()),
- RetVT.getVectorElementCount());
-
- SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
- SDValue NewNode = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops);
-
- if (VT.isFixedLengthVector()) {
- SDValue FixedVector =
- convertFromScalableVector(VT, NewNode, DAG, Subtarget);
- NewNode = DAG.getMergeValues({FixedVector, NewNode.getValue(1)}, DL);
- } else if (VT.isFloatingPoint()) {
- SDValue BitCast = DAG.getBitcast(VT, NewNode.getValue(0));
- NewNode = DAG.getMergeValues({BitCast, NewNode.getValue(1)}, DL);
- }
-
- if (Op == NewNode)
- break;
-
- return NewNode;
- }
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
+ case Intrinsic::riscv_sf_vc_v_fvw_se:
+ return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
}
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
@@ -8832,72 +8901,117 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
}
case Intrinsic::riscv_sf_vc_x_se_e8mf8:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8MF8);
case Intrinsic::riscv_sf_vc_x_se_e8mf4:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8MF4);
case Intrinsic::riscv_sf_vc_x_se_e8mf2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8MF2);
case Intrinsic::riscv_sf_vc_x_se_e8m1:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8M1);
case Intrinsic::riscv_sf_vc_x_se_e8m2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8M2);
case Intrinsic::riscv_sf_vc_x_se_e8m4:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8M4);
case Intrinsic::riscv_sf_vc_x_se_e8m8:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8M8);
case Intrinsic::riscv_sf_vc_x_se_e16mf4:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16MF4);
case Intrinsic::riscv_sf_vc_x_se_e16mf2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16MF2);
case Intrinsic::riscv_sf_vc_x_se_e16m1:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16M1);
case Intrinsic::riscv_sf_vc_x_se_e16m2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16M2);
case Intrinsic::riscv_sf_vc_x_se_e16m4:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16M4);
case Intrinsic::riscv_sf_vc_x_se_e16m8:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16M8);
case Intrinsic::riscv_sf_vc_x_se_e32mf2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32MF2);
case Intrinsic::riscv_sf_vc_x_se_e32m1:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32M1);
case Intrinsic::riscv_sf_vc_x_se_e32m2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32M2);
case Intrinsic::riscv_sf_vc_x_se_e32m4:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32M4);
case Intrinsic::riscv_sf_vc_x_se_e32m8:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32M8);
case Intrinsic::riscv_sf_vc_x_se_e64m1:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E64M1);
case Intrinsic::riscv_sf_vc_x_se_e64m2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E64M2);
case Intrinsic::riscv_sf_vc_x_se_e64m4:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E64M4);
case Intrinsic::riscv_sf_vc_x_se_e64m8:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E64M8);
case Intrinsic::riscv_sf_vc_i_se_e8mf8:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8MF8);
case Intrinsic::riscv_sf_vc_i_se_e8mf4:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8MF4);
case Intrinsic::riscv_sf_vc_i_se_e8mf2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8MF2);
case Intrinsic::riscv_sf_vc_i_se_e8m1:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8M1);
case Intrinsic::riscv_sf_vc_i_se_e8m2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8M2);
case Intrinsic::riscv_sf_vc_i_se_e8m4:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8M4);
case Intrinsic::riscv_sf_vc_i_se_e8m8:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8M8);
case Intrinsic::riscv_sf_vc_i_se_e16mf4:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16MF4);
case Intrinsic::riscv_sf_vc_i_se_e16mf2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16MF2);
case Intrinsic::riscv_sf_vc_i_se_e16m1:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16M1);
case Intrinsic::riscv_sf_vc_i_se_e16m2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16M2);
case Intrinsic::riscv_sf_vc_i_se_e16m4:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16M4);
case Intrinsic::riscv_sf_vc_i_se_e16m8:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16M8);
case Intrinsic::riscv_sf_vc_i_se_e32mf2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32MF2);
case Intrinsic::riscv_sf_vc_i_se_e32m1:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32M1);
case Intrinsic::riscv_sf_vc_i_se_e32m2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32M2);
case Intrinsic::riscv_sf_vc_i_se_e32m4:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32M4);
case Intrinsic::riscv_sf_vc_i_se_e32m8:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32M8);
case Intrinsic::riscv_sf_vc_i_se_e64m1:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E64M1);
case Intrinsic::riscv_sf_vc_i_se_e64m2:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E64M2);
case Intrinsic::riscv_sf_vc_i_se_e64m4:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E64M4);
case Intrinsic::riscv_sf_vc_i_se_e64m8:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E64M8);
case Intrinsic::riscv_sf_vc_xv_se:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
case Intrinsic::riscv_sf_vc_iv_se:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
case Intrinsic::riscv_sf_vc_vv_se:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
case Intrinsic::riscv_sf_vc_fv_se:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
case Intrinsic::riscv_sf_vc_xvv_se:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
case Intrinsic::riscv_sf_vc_ivv_se:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
case Intrinsic::riscv_sf_vc_vvv_se:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
case Intrinsic::riscv_sf_vc_fvv_se:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
case Intrinsic::riscv_sf_vc_xvw_se:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
case Intrinsic::riscv_sf_vc_ivw_se:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
case Intrinsic::riscv_sf_vc_vvw_se:
- case Intrinsic::riscv_sf_vc_fvw_se: {
- SmallVector<SDValue> Ops;
- getVCIXOperands(Op, DAG, Ops);
-
- SDValue NewNode =
- DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops);
-
- if (Op == NewNode)
- break;
-
- return NewNode;
- }
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
+ case Intrinsic::riscv_sf_vc_fvw_se:
+ return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
}
return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
@@ -18713,6 +18827,76 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(SWAP_CSR)
NODE_NAME_CASE(CZERO_EQZ)
NODE_NAME_CASE(CZERO_NEZ)
+ NODE_NAME_CASE(SF_VC_X_SE_E8MF8)
+ NODE_NAME_CASE(SF_VC_X_SE_E8MF4)
+ NODE_NAME_CASE(SF_VC_X_SE_E8MF2)
+ NODE_NAME_CASE(SF_VC_X_SE_E8M1)
+ NODE_NAME_CASE(SF_VC_X_SE_E8M2)
+ NODE_NAME_CASE(SF_VC_X_SE_E8M4)
+ NODE_NAME_CASE(SF_VC_X_SE_E8M8)
+ NODE_NAME_CASE(SF_VC_X_SE_E16MF4)
+ NODE_NAME_CASE(SF_VC_X_SE_E16MF2)
+ NODE_NAME_CASE(SF_VC_X_SE_E16M1)
+ NODE_NAME_CASE(SF_VC_X_SE_E16M2)
+ NODE_NAME_CASE(SF_VC_X_SE_E16M4)
+ NODE_NAME_CASE(SF_VC_X_SE_E16M8)
+ NODE_NAME_CASE(SF_VC_X_SE_E32MF2)
+ NODE_NAME_CASE(SF_VC_X_SE_E32M1)
+ NODE_NAME_CASE(SF_VC_X_SE_E32M2)
+ NODE_NAME_CASE(SF_VC_X_SE_E32M4)
+ NODE_NAME_CASE(SF_VC_X_SE_E32M8)
+ NODE_NAME_CASE(SF_VC_X_SE_E64M1)
+ NODE_NAME_CASE(SF_VC_X_SE_E64M2)
+ NODE_NAME_CASE(SF_VC_X_SE_E64M4)
+ NODE_NAME_CASE(SF_VC_X_SE_E64M8)
+ NODE_NAME_CASE(SF_VC_I_SE_E8MF8)
+ NODE_NAME_CASE(SF_VC_I_SE_E8MF4)
+ NODE_NAME_CASE(SF_VC_I_SE_E8MF2)
+ NODE_NAME_CASE(SF_VC_I_SE_E8M1)
+ NODE_NAME_CASE(SF_VC_I_SE_E8M2)
+ NODE_NAME_CASE(SF_VC_I_SE_E8M4)
+ NODE_NAME_CASE(SF_VC_I_SE_E8M8)
+ NODE_NAME_CASE(SF_VC_I_SE_E16MF4)
+ NODE_NAME_CASE(SF_VC_I_SE_E16MF2)
+ NODE_NAME_CASE(SF_VC_I_SE_E16M1)
+ NODE_NAME_CASE(SF_VC_I_SE_E16M2)
+ NODE_NAME_CASE(SF_VC_I_SE_E16M4)
+ NODE_NAME_CASE(SF_VC_I_SE_E16M8)
+ NODE_NAME_CASE(SF_VC_I_SE_E32MF2)
+ NODE_NAME_CASE(SF_VC_I_SE_E32M1)
+ NODE_NAME_CASE(SF_VC_I_SE_E32M2)
+ NODE_NAME_CASE(SF_VC_I_SE_E32M4)
+ NODE_NAME_CASE(SF_VC_I_SE_E32M8)
+ NODE_NAME_CASE(SF_VC_I_SE_E64M1)
+ NODE_NAME_CASE(SF_VC_I_SE_E64M2)
+ NODE_NAME_CASE(SF_VC_I_SE_E64M4)
+ NODE_NAME_CASE(SF_VC_I_SE_E64M8)
+ NODE_NAME_CASE(SF_VC_XV_SE)
+ NODE_NAME_CASE(SF_VC_IV_SE)
+ NODE_NAME_CASE(SF_VC_VV_SE)
+ NODE_NAME_CASE(SF_VC_FV_SE)
+ NODE_NAME_CASE(SF_VC_XVV_SE)
+ NODE_NAME_CASE(SF_VC_IVV_SE)
+ NODE_NAME_CASE(SF_VC_VVV_SE)
+ NODE_NAME_CASE(SF_VC_FVV_SE)
+ NODE_NAME_CASE(SF_VC_XVW_SE)
+ NODE_NAME_CASE(SF_VC_IVW_SE)
+ NODE_NAME_CASE(SF_VC_VVW_SE)
+ NODE_NAME_CASE(SF_VC_FVW_SE)
+ NODE_NAME_CASE(SF_VC_V_X_SE)
+ NODE_NAME_CASE(SF_VC_V_I_SE)
+ NODE_NAME_CASE(SF_VC_V_XV_SE)
+ NODE_NAME_CASE(SF_VC_V_IV_SE)
+ NODE_NAME_CASE(SF_VC_V_VV_SE)
+ NODE_NAME_CASE(SF_VC_V_FV_SE)
+ NODE_NAME_CASE(SF_VC_V_XVV_SE)
+ NODE_NAME_CASE(SF_VC_V_IVV_SE)
+ NODE_NAME_CASE(SF_VC_V_VVV_SE)
+ NODE_NAME_CASE(SF_VC_V_FVV_SE)
+ NODE_NAME_CASE(SF_VC_V_XVW_SE)
+ NODE_NAME_CASE(SF_VC_V_IVW_SE)
+ NODE_NAME_CASE(SF_VC_V_VVW_SE)
+ NODE_NAME_CASE(SF_VC_V_FVW_SE)
}
// clang-format on
return nullptr;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 18f58057558166..97360a8ae9e5f2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -410,6 +410,77 @@ enum NodeType : unsigned {
STRICT_VFROUND_NOEXCEPT_VL,
LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL,
+ SF_VC_X_SE_E8MF8,
+ SF_VC_X_SE_E8MF4,
+ SF_VC_X_SE_E8MF2,
+ SF_VC_X_SE_E8M1,
+ SF_VC_X_SE_E8M2,
+ SF_VC_X_SE_E8M4,
+ SF_VC_X_SE_E8M8,
+ SF_VC_X_SE_E16MF4,
+ SF_VC_X_SE_E16MF2,
+ SF_VC_X_SE_E16M1,
+ SF_VC_X_SE_E16M2,
+ SF_VC_X_SE_E16M4,
+ SF_VC_X_SE_E16M8,
+ SF_VC_X_SE_E32MF2,
+ SF_VC_X_SE_E32M1,
+ SF_VC_X_SE_E32M2,
+ SF_VC_X_SE_E32M4,
+ SF_VC_X_SE_E32M8,
+ SF_VC_X_SE_E64M1,
+ SF_VC_X_SE_E64M2,
+ SF_VC_X_SE_E64M4,
+ SF_VC_X_SE_E64M8,
+ SF_VC_I_SE_E8MF8,
+ SF_VC_I_SE_E8MF4,
+ SF_VC_I_SE_E8MF2,
+ SF_VC_I_SE_E8M1,
+ SF_VC_I_SE_E8M2,
+ SF_VC_I_SE_E8M4,
+ SF_VC_I_SE_E8M8,
+ SF_VC_I_SE_E16MF4,
+ SF_VC_I_SE_E16MF2,
+ SF_VC_I_SE_E16M1,
+ SF_VC_I_SE_E16M2,
+ SF_VC_I_SE_E16M4,
+ SF_VC_I_SE_E16M8,
+ SF_VC_I_SE_E32MF2,
+ SF_VC_I_SE_E32M1,
+ SF_VC_I_SE_E32M2,
+ SF_VC_I_SE_E32M4,
+ SF_VC_I_SE_E32M8,
+ SF_VC_I_SE_E64M1,
+ SF_VC_I_SE_E64M2,
+ SF_VC_I_SE_E64M4,
+ SF_VC_I_SE_E64M8,
+ SF_VC_XV_SE,
+ SF_VC_IV_SE,
+ SF_VC_VV_SE,
+ SF_VC_FV_SE,
+ SF_VC_XVV_SE,
+ SF_VC_IVV_SE,
+ SF_VC_VVV_SE,
+ SF_VC_FVV_SE,
+ SF_VC_XVW_SE,
+ SF_VC_IVW_SE,
+ SF_VC_VVW_SE,
+ SF_VC_FVW_SE,
+ SF_VC_V_X_SE,
+ SF_VC_V_I_SE,
+ SF_VC_V_XV_SE,
+ SF_VC_V_IV_SE,
+ SF_VC_V_VV_SE,
+ SF_VC_V_FV_SE,
+ SF_VC_V_XVV_SE,
+ SF_VC_V_IVV_SE,
+ SF_VC_V_VVV_SE,
+ SF_VC_V_FVV_SE,
+ SF_VC_V_XVW_SE,
+ SF_VC_V_IVW_SE,
+ SF_VC_V_VVW_SE,
+ SF_VC_V_FVW_SE,
+
// WARNING: Do not add anything in the end unless you want the node to
// have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
// opcodes will be thought as target memory ops!
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 31f832dfd84cee..d22f98d693b1bf 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -221,8 +221,8 @@ let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf
def VFNRCLIP_XU_F_QF : CustomSiFiveVFNRCLIP<0b100010, OPFVF, "sf.vfnrclip.xu.f.qf">;
def VFNRCLIP_X_F_QF : CustomSiFiveVFNRCLIP<0b100011, OPFVF, "sf.vfnrclip.x.f.qf">;
}
-class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class,
- bit HasSideEffect = 1> :
+
+class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class> :
Pseudo<(outs),
(ins OpClass:$op1, payload5:$rs2, payload5:$rd, RS1Class:$r1,
AVL:$vl, ixlenimm:$sew), []>,
@@ -231,12 +231,11 @@ class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class,
let mayStore = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let hasSideEffects = HasSideEffect;
+ let hasSideEffects = 0;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
-class VPseudoVC_XV<Operand OpClass, VReg RS2Class, DAGOperand RS1Class,
- bit HasSideEffect = 1> :
+class VPseudoVC_XV<Operand OpClass, VReg RS2Class, DAGOperand RS1Class> :
Pseudo<(outs),
(ins OpClass:$op1, payload5:$rd, RS2Class:$rs2, RS1Class:$r1,
AVL:$vl, ixlenimm:$sew), []>,
@@ -245,12 +244,12 @@ class VPseudoVC_XV<Operand OpClass, VReg RS2Class, DAGOperand RS1Class,
let mayStore = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let hasSideEffects = HasSideEffect;
+ let hasSideEffects = 0;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoVC_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
- DAGOperand RS1Class, bit HasSideEffect = 1> :
+ DAGOperand RS1Class> :
Pseudo<(outs),
(ins OpClass:$op1, RDClass:$rd, RS2Class:$rs2, RS1Class:$r1,
AVL:$vl, ixlenimm:$sew), []>,
@@ -259,12 +258,11 @@ class VPseudoVC_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
let mayStore = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let hasSideEffects = HasSideEffect;
+ let hasSideEffects = 0;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
-class VPseudoVC_V_X<Operand OpClass, VReg RDClass, DAGOperand RS1Class,
- bit HasSideEffect = 1> :
+class VPseudoVC_V_X<Operand OpClass, VReg RDClass, DAGOperand RS1Class> :
Pseudo<(outs RDClass:$rd),
(ins OpClass:$op1, payload5:$rs2, RS1Class:$r1,
AVL:$vl, ixlenimm:$sew), []>,
@@ -273,12 +271,12 @@ class VPseudoVC_V_X<Operand OpClass, VReg RDClass, DAGOperand RS1Class,
let mayStore = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let hasSideEffects = HasSideEffect;
+ let hasSideEffects = 0;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoVC_V_XV<Operand OpClass, VReg RDClass, VReg RS2Class,
- DAGOperand RS1Class, bit HasSideEffect = 1> :
+ DAGOperand RS1Class> :
Pseudo<(outs RDClass:$rd),
(ins OpClass:$op1, RS2Class:$rs2, RS1Class:$r1,
AVL:$vl, ixlenimm:$sew), []>,
@@ -287,12 +285,12 @@ class VPseudoVC_V_XV<Operand OpClass, VReg RDClass, VReg RS2Class,
let mayStore = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let hasSideEffects = HasSideEffect;
+ let hasSideEffects = 0;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
class VPseudoVC_V_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
- DAGOperand RS1Class, bit HasSideEffect = 1> :
+ DAGOperand RS1Class> :
Pseudo<(outs RDClass:$rd),
(ins OpClass:$op1, RDClass:$rs3, RS2Class:$rs2, RS1Class:$r1,
AVL:$vl, ixlenimm:$sew), []>,
@@ -301,44 +299,52 @@ class VPseudoVC_V_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
let mayStore = 0;
let HasVLOp = 1;
let HasSEWOp = 1;
- let hasSideEffects = HasSideEffect;
+ let hasSideEffects = 0;
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
multiclass VPseudoVC_X<LMULInfo m, DAGOperand RS1Class,
Operand OpClass = payload2> {
let VLMul = m.value in {
- def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_X<OpClass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class, 0>;
+ let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
+ def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_X<OpClass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
+ }
+ def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
}
}
multiclass VPseudoVC_XV<LMULInfo m, DAGOperand RS1Class,
Operand OpClass = payload2> {
let VLMul = m.value in {
- def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class, 0>;
+ let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
+ def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ }
+ def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
}
}
multiclass VPseudoVC_XVV<LMULInfo m, DAGOperand RS1Class,
Operand OpClass = payload2> {
let VLMul = m.value in {
- def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class, 0>;
+ let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
+ def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+ }
+ def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
}
}
multiclass VPseudoVC_XVW<LMULInfo m, DAGOperand RS1Class,
Operand OpClass = payload2> {
let VLMul = m.value in {
+ let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in
def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
let Constraints = "@earlyclobber $rd, $rd = $rs3" in {
+ let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in
def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
- def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class, 0>;
+ def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
}
}
}
@@ -428,6 +434,149 @@ let Predicates = [HasVendorXSfvfnrclipxfqf] in {
defm VFNRCLIP_X_F_QF : VPseudoSiFiveVFNRCLIP;
}
+// SDNode
+def SDT_SF_VC_X : SDTypeProfile<0, 5, [SDTCisSameAs<0, 1>,
+ SDTCisVT<0, XLenVT>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisSameAs<0, 4>]>;
+
+def SDT_SF_VC_V_X : SDTypeProfile<1, 4, [SDTCisVec<0>,
+ SDTCisVT<1, XLenVT>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<1, 3>,
+ SDTCisSameAs<1, 4>]>;
+
+def SDT_SF_VC_XV : SDTypeProfile<0, 5, [SDTCisSameAs<0, 1>,
+ SDTCisVec<2>,
+ SDTCisSameAs<0, 4>,
+ SDTCisVT<0, XLenVT>]>;
+
+def SDT_SF_VC_V_XV : SDTypeProfile<1, 4, [SDTCisVec<0>,
+ SDTCisVT<1, XLenVT>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<1, 4>]>;
+
+def SDT_SF_VC_XVV : SDTypeProfile<0, 5, [SDTCisVT<0, XLenVT>,
+ SDTCisVec<1>,
+ SDTCisSameAs<1, 2>,
+ SDTCisSameAs<0, 4>]>;
+
+def SDT_SF_VC_V_XVV : SDTypeProfile<1, 5, [SDTCisVec<0>,
+ SDTCisVT<1, XLenVT>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>,
+ SDTCisSameAs<1, 5>]>;
+
+def SDT_SF_VC_XVW : SDTypeProfile<0, 5, [SDTCisVT<0, XLenVT>,
+ SDTCisVec<1>, SDTCisVec<2>,
+ SDTCisSameAs<0, 4>]>;
+
+def SDT_SF_VC_V_XVW : SDTypeProfile<1, 5, [SDTCisVec<0>,
+ SDTCisVT<1, XLenVT>,
+ SDTCisSameAs<0, 2>,
+ SDTCisVec<3>,
+ SDTCisSameAs<1, 5>]>;
+
+foreach vti = AllIntegerVectors in {
+ def sf_vc_x_e#vti.SEW#!tolower(vti.LMul.MX) : SDNode<"RISCVISD::SF_VC_X_SE_E"#vti.SEW#vti.LMul.MX, SDT_SF_VC_X, [SDNPHasChain]>;
+ def sf_vc_i_e#vti.SEW#!tolower(vti.LMul.MX) : SDNode<"RISCVISD::SF_VC_I_SE_E"#vti.SEW#vti.LMul.MX, SDT_SF_VC_X, [SDNPHasChain]>;
+}
+def sf_vc_v_x_se : SDNode<"RISCVISD::SF_VC_V_X_SE", SDT_SF_VC_V_X, [SDNPHasChain]>;
+def sf_vc_v_i_se : SDNode<"RISCVISD::SF_VC_V_I_SE", SDT_SF_VC_V_X, [SDNPHasChain]>;
+def sf_vc_vv_se : SDNode<"RISCVISD::SF_VC_VV_SE", SDT_SF_VC_XV, [SDNPHasChain]>;
+def sf_vc_xv_se : SDNode<"RISCVISD::SF_VC_XV_SE", SDT_SF_VC_XV, [SDNPHasChain]>;
+def sf_vc_iv_se : SDNode<"RISCVISD::SF_VC_IV_SE", SDT_SF_VC_XV, [SDNPHasChain]>;
+def sf_vc_fv_se : SDNode<"RISCVISD::SF_VC_FV_SE", SDT_SF_VC_XV, [SDNPHasChain]>;
+def sf_vc_v_vv_se : SDNode<"RISCVISD::SF_VC_V_VV_SE", SDT_SF_VC_V_XV, [SDNPHasChain]>;
+def sf_vc_v_xv_se : SDNode<"RISCVISD::SF_VC_V_XV_SE", SDT_SF_VC_V_XV, [SDNPHasChain]>;
+def sf_vc_v_iv_se : SDNode<"RISCVISD::SF_VC_V_IV_SE", SDT_SF_VC_V_XV, [SDNPHasChain]>;
+def sf_vc_v_fv_se : SDNode<"RISCVISD::SF_VC_V_FV_SE", SDT_SF_VC_V_XV, [SDNPHasChain]>;
+def sf_vc_vvv_se : SDNode<"RISCVISD::SF_VC_VVV_SE", SDT_SF_VC_XVV, [SDNPHasChain]>;
+def sf_vc_xvv_se : SDNode<"RISCVISD::SF_VC_XVV_SE", SDT_SF_VC_XVV, [SDNPHasChain]>;
+def sf_vc_ivv_se : SDNode<"RISCVISD::SF_VC_IVV_SE", SDT_SF_VC_XVV, [SDNPHasChain]>;
+def sf_vc_fvv_se : SDNode<"RISCVISD::SF_VC_FVV_SE", SDT_SF_VC_XVV, [SDNPHasChain]>;
+def sf_vc_v_vvv_se : SDNode<"RISCVISD::SF_VC_V_VVV_SE", SDT_SF_VC_V_XVV, [SDNPHasChain]>;
+def sf_vc_v_xvv_se : SDNode<"RISCVISD::SF_VC_V_XVV_SE", SDT_SF_VC_V_XVV, [SDNPHasChain]>;
+def sf_vc_v_ivv_se : SDNode<"RISCVISD::SF_VC_V_IVV_SE", SDT_SF_VC_V_XVV, [SDNPHasChain]>;
+def sf_vc_v_fvv_se : SDNode<"RISCVISD::SF_VC_V_FVV_SE", SDT_SF_VC_V_XVV, [SDNPHasChain]>;
+def sf_vc_vvw_se : SDNode<"RISCVISD::SF_VC_VVW_SE", SDT_SF_VC_XVW, [SDNPHasChain]>;
+def sf_vc_xvw_se : SDNode<"RISCVISD::SF_VC_XVW_SE", SDT_SF_VC_XVW, [SDNPHasChain]>;
+def sf_vc_ivw_se : SDNode<"RISCVISD::SF_VC_IVW_SE", SDT_SF_VC_XVW, [SDNPHasChain]>;
+def sf_vc_fvw_se : SDNode<"RISCVISD::SF_VC_FVW_SE", SDT_SF_VC_XVW, [SDNPHasChain]>;
+def sf_vc_v_vvw_se : SDNode<"RISCVISD::SF_VC_V_VVW_SE", SDT_SF_VC_V_XVW, [SDNPHasChain]>;
+def sf_vc_v_xvw_se : SDNode<"RISCVISD::SF_VC_V_XVW_SE", SDT_SF_VC_V_XVW, [SDNPHasChain]>;
+def sf_vc_v_ivw_se : SDNode<"RISCVISD::SF_VC_V_IVW_SE", SDT_SF_VC_V_XVW, [SDNPHasChain]>;
+def sf_vc_v_fvw_se : SDNode<"RISCVISD::SF_VC_V_FVW_SE", SDT_SF_VC_V_XVW, [SDNPHasChain]>;
+
+class VPatVC_OP4_ISD<SDPatternOperator op,
+ string inst,
+ ValueType op2_type,
+ ValueType op3_type,
+ ValueType op4_type,
+ int sew,
+ DAGOperand op2_kind,
+ DAGOperand op3_kind,
+ DAGOperand op4_kind,
+ Operand op1_kind = payload2> :
+ Pat<(op
+ (XLenVT op1_kind:$op1),
+ (op2_type op2_kind:$op2),
+ (op3_type op3_kind:$op3),
+ (op4_type op4_kind:$op4),
+ VLOpFrag),
+ (!cast<Instruction>(inst)
+ (XLenVT op1_kind:$op1),
+ (op2_type op2_kind:$op2),
+ (op3_type op3_kind:$op3),
+ (op4_type op4_kind:$op4),
+ GPR:$vl, sew)>;
+
+class VPatVC_V_OP4_ISD<SDPatternOperator op,
+ string inst,
+ ValueType result_type,
+ ValueType op2_type,
+ ValueType op3_type,
+ ValueType op4_type,
+ int sew,
+ DAGOperand op2_kind,
+ DAGOperand op3_kind,
+ DAGOperand op4_kind,
+ Operand op1_kind = payload2> :
+ Pat<(result_type (op
+ (XLenVT op1_kind:$op1),
+ (op2_type op2_kind:$op2),
+ (op3_type op3_kind:$op3),
+ (op4_type op4_kind:$op4),
+ VLOpFrag)),
+ (!cast<Instruction>(inst)
+ (XLenVT op1_kind:$op1),
+ (op2_type op2_kind:$op2),
+ (op3_type op3_kind:$op3),
+ (op4_type op4_kind:$op4),
+ GPR:$vl, sew)>;
+
+
+class VPatVC_V_OP3_ISD<SDPatternOperator op,
+ string inst,
+ ValueType result_type,
+ ValueType op2_type,
+ ValueType op3_type,
+ int sew,
+ DAGOperand op2_kind,
+ DAGOperand op3_kind,
+ Operand op1_kind = payload2> :
+ Pat<(result_type (op
+ (XLenVT op1_kind:$op1),
+ (op2_type op2_kind:$op2),
+ (op3_type op3_kind:$op3),
+ VLOpFrag)),
+ (!cast<Instruction>(inst)
+ (XLenVT op1_kind:$op1),
+ (op2_type op2_kind:$op2),
+ (op3_type op3_kind:$op3),
+ GPR:$vl, sew)>;
+
class VPatVC_OP4<string intrinsic_name,
string inst,
ValueType op2_type,
@@ -497,14 +646,14 @@ class VPatVC_V_OP3<string intrinsic_name,
multiclass VPatVC_X<string intrinsic_suffix, string instruction_suffix,
VTypeInfo vti, ValueType type, DAGOperand kind> {
- def : VPatVC_OP4<"int_riscv_sf_vc_" # intrinsic_suffix # "_se_e" # vti.SEW # !tolower(vti.LMul.MX),
- "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
- XLenVT, XLenVT, type, vti.Log2SEW,
- payload5, payload5, kind>;
- def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix # "_se",
- "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
- vti.Vector, XLenVT, type, vti.Log2SEW,
- payload5, kind>;
+ def : VPatVC_OP4_ISD<!cast<SDPatternOperator>("sf_vc_" # intrinsic_suffix # "_e" # vti.SEW # !tolower(vti.LMul.MX)),
+ "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+ XLenVT, XLenVT, type, vti.Log2SEW,
+ payload5, payload5, kind>;
+ def : VPatVC_V_OP3_ISD<!cast<SDPatternOperator>("sf_vc_v_" # intrinsic_suffix # "_se"),
+ "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+ vti.Vector, XLenVT, type, vti.Log2SEW,
+ payload5, kind>;
def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix,
"PseudoVC_V_" # instruction_suffix # "_" # vti.LMul.MX,
vti.Vector, XLenVT, type, vti.Log2SEW,
@@ -514,14 +663,14 @@ multiclass VPatVC_X<string intrinsic_suffix, string instruction_suffix,
multiclass VPatVC_XV<string intrinsic_suffix, string instruction_suffix,
VTypeInfo vti, ValueType type, DAGOperand kind,
Operand op1_kind = payload2> {
- def : VPatVC_OP4<"int_riscv_sf_vc_" # intrinsic_suffix # "_se",
+ def : VPatVC_OP4_ISD<!cast<SDPatternOperator>("sf_vc_" # intrinsic_suffix # "_se"),
"PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
XLenVT, vti.Vector, type, vti.Log2SEW,
payload5, vti.RegClass, kind, op1_kind>;
- def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix # "_se",
- "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
- vti.Vector, vti.Vector, type, vti.Log2SEW,
- vti.RegClass, kind, op1_kind>;
+ def : VPatVC_V_OP3_ISD<!cast<SDPatternOperator>("sf_vc_v_" # intrinsic_suffix # "_se"),
+ "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+ vti.Vector, vti.Vector, type, vti.Log2SEW,
+ vti.RegClass, kind, op1_kind>;
def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix,
"PseudoVC_V_" # instruction_suffix # "_" # vti.LMul.MX,
vti.Vector, vti.Vector, type, vti.Log2SEW,
@@ -531,11 +680,11 @@ multiclass VPatVC_XV<string intrinsic_suffix, string instruction_suffix,
multiclass VPatVC_XVV<string intrinsic_suffix, string instruction_suffix,
VTypeInfo wti, VTypeInfo vti, ValueType type, DAGOperand kind,
Operand op1_kind = payload2> {
- def : VPatVC_OP4<"int_riscv_sf_vc_" # intrinsic_suffix # "_se",
+ def : VPatVC_OP4_ISD<!cast<SDPatternOperator>("sf_vc_" # intrinsic_suffix # "_se"),
"PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
wti.Vector, vti.Vector, type, vti.Log2SEW,
wti.RegClass, vti.RegClass, kind, op1_kind>;
- def : VPatVC_V_OP4<"int_riscv_sf_vc_v_" # intrinsic_suffix # "_se",
+ def : VPatVC_V_OP4_ISD<!cast<SDPatternOperator>("sf_vc_v_" # intrinsic_suffix # "_se"),
"PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
wti.Vector, wti.Vector, vti.Vector, type, vti.Log2SEW,
wti.RegClass, vti.RegClass, kind, op1_kind>;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 24f8d600f1eafc..889fab494a53af 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -120,6 +120,9 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
markSuperRegs(Reserved, RISCV::FRM);
markSuperRegs(Reserved, RISCV::FFLAGS);
+ // SiFive VCIX state registers.
+ markSuperRegs(Reserved, RISCV::VCIX_STATE);
+
if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
if (Subtarget.isRVE())
report_fatal_error("Graal reserved registers do not exist in RVE");
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 840fd149d68198..44509039956c21 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -598,3 +598,6 @@ def FRM : RISCVReg<0, "frm">;
// Shadow Stack register
def SSP : RISCVReg<0, "ssp">;
+
+// Dummy VCIX state register
+def VCIX_STATE : RISCVReg<0, "vcix_state">;
diff --git a/llvm/test/CodeGen/RISCV/pr69586.ll b/llvm/test/CodeGen/RISCV/pr69586.ll
index ef91334c5ff004..2d5fce2ca4970e 100644
--- a/llvm/test/CodeGen/RISCV/pr69586.ll
+++ b/llvm/test/CodeGen/RISCV/pr69586.ll
@@ -7,21 +7,21 @@
define void @test(ptr %0, ptr %1, i64 %2) {
; NOREMAT-LABEL: test:
; NOREMAT: # %bb.0:
-; NOREMAT-NEXT: addi sp, sp, -368
-; NOREMAT-NEXT: .cfi_def_cfa_offset 368
-; NOREMAT-NEXT: sd ra, 360(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: sd s0, 352(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: sd s1, 344(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: sd s2, 336(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: sd s3, 328(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: sd s4, 320(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: sd s5, 312(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: sd s6, 304(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: sd s7, 296(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: sd s8, 288(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: sd s9, 280(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: sd s10, 272(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: sd s11, 264(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: addi sp, sp, -400
+; NOREMAT-NEXT: .cfi_def_cfa_offset 400
+; NOREMAT-NEXT: sd ra, 392(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd s0, 384(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd s1, 376(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd s2, 368(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd s3, 360(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd s4, 352(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd s5, 344(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd s6, 336(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd s7, 328(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd s8, 320(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd s9, 312(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd s10, 304(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd s11, 296(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: .cfi_offset ra, -8
; NOREMAT-NEXT: .cfi_offset s0, -16
; NOREMAT-NEXT: .cfi_offset s1, -24
@@ -35,6 +35,11 @@ define void @test(ptr %0, ptr %1, i64 %2) {
; NOREMAT-NEXT: .cfi_offset s9, -88
; NOREMAT-NEXT: .cfi_offset s10, -96
; NOREMAT-NEXT: .cfi_offset s11, -104
+; NOREMAT-NEXT: csrr a2, vlenb
+; NOREMAT-NEXT: li a3, 6
+; NOREMAT-NEXT: mul a2, a2, a3
+; NOREMAT-NEXT: sub sp, sp, a2
+; NOREMAT-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x03, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 400 + 6 * vlenb
; NOREMAT-NEXT: li a2, 32
; NOREMAT-NEXT: vsetvli zero, a2, e32, m2, ta, ma
; NOREMAT-NEXT: vle32.v v8, (a0)
@@ -50,670 +55,728 @@ define void @test(ptr %0, ptr %1, i64 %2) {
; NOREMAT-NEXT: vle32.v v10, (a2)
; NOREMAT-NEXT: li a2, 1
; NOREMAT-NEXT: slli a2, a2, 11
-; NOREMAT-NEXT: sd a2, 256(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd a2, 272(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a2, a0, a2
; NOREMAT-NEXT: vle32.v v12, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT: vle32.v v8, (a2)
-; NOREMAT-NEXT: li a4, 5
-; NOREMAT-NEXT: slli a2, a4, 9
-; NOREMAT-NEXT: sd a2, 248(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: li a5, 5
+; NOREMAT-NEXT: slli a2, a5, 9
+; NOREMAT-NEXT: sd a2, 264(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a2, a0, a2
; NOREMAT-NEXT: vle32.v v14, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT: vle32.v v10, (a2)
-; NOREMAT-NEXT: li a5, 3
-; NOREMAT-NEXT: slli a2, a5, 10
-; NOREMAT-NEXT: sd a2, 240(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a2, a0, a2
-; NOREMAT-NEXT: vle32.v v12, (a2)
+; NOREMAT-NEXT: li a2, 3
+; NOREMAT-NEXT: slli a3, a2, 10
+; NOREMAT-NEXT: sd a3, 256(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: add a3, a0, a3
+; NOREMAT-NEXT: vle32.v v12, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
-; NOREMAT-NEXT: li a3, 7
-; NOREMAT-NEXT: slli a2, a3, 9
-; NOREMAT-NEXT: sd a2, 232(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a2, a0, a2
-; NOREMAT-NEXT: vle32.v v14, (a2)
+; NOREMAT-NEXT: vle32.v v8, (a3)
+; NOREMAT-NEXT: li a4, 7
+; NOREMAT-NEXT: slli a3, a4, 9
+; NOREMAT-NEXT: sd a3, 248(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: add a3, a0, a3
+; NOREMAT-NEXT: vle32.v v14, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
-; NOREMAT-NEXT: lui a2, 1
-; NOREMAT-NEXT: add a2, a0, a2
-; NOREMAT-NEXT: vle32.v v12, (a2)
+; NOREMAT-NEXT: vle32.v v10, (a3)
+; NOREMAT-NEXT: lui a3, 1
+; NOREMAT-NEXT: add a3, a0, a3
+; NOREMAT-NEXT: vle32.v v12, (a3)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
-; NOREMAT-NEXT: li a2, 9
-; NOREMAT-NEXT: slli a6, a2, 9
-; NOREMAT-NEXT: sd a6, 224(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: vle32.v v8, (a3)
+; NOREMAT-NEXT: li a3, 9
+; NOREMAT-NEXT: slli a6, a3, 9
+; NOREMAT-NEXT: sd a6, 240(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a6, a0, a6
; NOREMAT-NEXT: vle32.v v14, (a6)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT: vle32.v v10, (a6)
-; NOREMAT-NEXT: slli a6, a4, 10
-; NOREMAT-NEXT: sd a6, 216(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: slli a6, a5, 10
+; NOREMAT-NEXT: sd a6, 232(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a6, a0, a6
; NOREMAT-NEXT: vle32.v v12, (a6)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT: vle32.v v8, (a6)
; NOREMAT-NEXT: li s8, 11
; NOREMAT-NEXT: slli a6, s8, 9
-; NOREMAT-NEXT: sd a6, 208(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd a6, 224(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a6, a0, a6
; NOREMAT-NEXT: vle32.v v14, (a6)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT: vle32.v v10, (a6)
-; NOREMAT-NEXT: slli a5, a5, 11
-; NOREMAT-NEXT: sd a5, 200(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a5, a0, a5
-; NOREMAT-NEXT: vle32.v v12, (a5)
+; NOREMAT-NEXT: slli a2, a2, 11
+; NOREMAT-NEXT: sd a2, 216(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: add a2, a0, a2
+; NOREMAT-NEXT: vle32.v v12, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a5)
+; NOREMAT-NEXT: vle32.v v8, (a2)
; NOREMAT-NEXT: li s2, 13
-; NOREMAT-NEXT: slli a5, s2, 9
-; NOREMAT-NEXT: sd a5, 192(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a5, a0, a5
-; NOREMAT-NEXT: vle32.v v14, (a5)
+; NOREMAT-NEXT: slli a2, s2, 9
+; NOREMAT-NEXT: sd a2, 208(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: add a2, a0, a2
+; NOREMAT-NEXT: vle32.v v14, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a5)
-; NOREMAT-NEXT: slli a5, a3, 10
-; NOREMAT-NEXT: sd a5, 184(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a5, a0, a5
-; NOREMAT-NEXT: vle32.v v12, (a5)
+; NOREMAT-NEXT: vle32.v v10, (a2)
+; NOREMAT-NEXT: slli a2, a4, 10
+; NOREMAT-NEXT: sd a2, 200(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: add a2, a0, a2
+; NOREMAT-NEXT: vle32.v v12, (a2)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a5)
-; NOREMAT-NEXT: li t0, 15
-; NOREMAT-NEXT: slli a5, t0, 9
-; NOREMAT-NEXT: sd a5, 176(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a5, a0, a5
-; NOREMAT-NEXT: vle32.v v14, (a5)
+; NOREMAT-NEXT: vle32.v v8, (a2)
+; NOREMAT-NEXT: li a2, 15
+; NOREMAT-NEXT: slli a6, a2, 9
+; NOREMAT-NEXT: sd a6, 192(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: add a6, a0, a6
+; NOREMAT-NEXT: vle32.v v26, (a6)
; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a5)
-; NOREMAT-NEXT: lui a5, 2
+; NOREMAT-NEXT: vle32.v v16, (a6)
+; NOREMAT-NEXT: lui a6, 2
+; NOREMAT-NEXT: add a6, a0, a6
+; NOREMAT-NEXT: vle32.v v28, (a6)
+; NOREMAT-NEXT: vle32.v v10, (a6)
+; NOREMAT-NEXT: li a6, 17
+; NOREMAT-NEXT: slli a6, a6, 9
+; NOREMAT-NEXT: sd a6, 184(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: li t0, 17
+; NOREMAT-NEXT: add a6, a0, a6
+; NOREMAT-NEXT: vle32.v v30, (a6)
+; NOREMAT-NEXT: vle32.v v18, (a6)
+; NOREMAT-NEXT: slli a6, a3, 10
+; NOREMAT-NEXT: sd a6, 176(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: add a6, a0, a6
+; NOREMAT-NEXT: vle32.v v0, (a6)
+; NOREMAT-NEXT: vle32.v v20, (a6)
+; NOREMAT-NEXT: li a6, 19
+; NOREMAT-NEXT: slli a6, a6, 9
+; NOREMAT-NEXT: sd a6, 168(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: li a7, 19
+; NOREMAT-NEXT: add a6, a0, a6
+; NOREMAT-NEXT: vle32.v v2, (a6)
+; NOREMAT-NEXT: vle32.v v22, (a6)
+; NOREMAT-NEXT: slli a5, a5, 11
+; NOREMAT-NEXT: sd a5, 160(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a5, a0, a5
+; NOREMAT-NEXT: vle32.v v4, (a5)
; NOREMAT-NEXT: vle32.v v12, (a5)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a5)
-; NOREMAT-NEXT: li a5, 17
-; NOREMAT-NEXT: slli a5, a5, 9
-; NOREMAT-NEXT: sd a5, 168(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: li a7, 17
+; NOREMAT-NEXT: li s10, 21
+; NOREMAT-NEXT: slli a5, s10, 9
+; NOREMAT-NEXT: sd a5, 152(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a5, a0, a5
+; NOREMAT-NEXT: vle32.v v24, (a5)
; NOREMAT-NEXT: vle32.v v14, (a5)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a5)
-; NOREMAT-NEXT: slli a5, a2, 10
-; NOREMAT-NEXT: sd a5, 160(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v26
+; NOREMAT-NEXT: slli a5, s8, 10
+; NOREMAT-NEXT: sd a5, 144(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a5, a0, a5
-; NOREMAT-NEXT: vle32.v v12, (a5)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT: vle32.v v26, (a5)
; NOREMAT-NEXT: vle32.v v8, (a5)
-; NOREMAT-NEXT: li a5, 19
-; NOREMAT-NEXT: slli a5, a5, 9
-; NOREMAT-NEXT: sd a5, 152(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: li a6, 19
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v28
+; NOREMAT-NEXT: li s6, 23
+; NOREMAT-NEXT: slli a5, s6, 9
+; NOREMAT-NEXT: sd a5, 136(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a5, a0, a5
-; NOREMAT-NEXT: vle32.v v14, (a5)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT: vle32.v v28, (a5)
+; NOREMAT-NEXT: vle32.v v16, (a5)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v30
+; NOREMAT-NEXT: lui a5, 3
+; NOREMAT-NEXT: add a5, a0, a5
+; NOREMAT-NEXT: vle32.v v30, (a5)
; NOREMAT-NEXT: vle32.v v10, (a5)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v0
+; NOREMAT-NEXT: li s3, 25
+; NOREMAT-NEXT: slli a5, s3, 9
+; NOREMAT-NEXT: sd a5, 128(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: add a5, a0, a5
+; NOREMAT-NEXT: vle32.v v0, (a5)
+; NOREMAT-NEXT: vle32.v v18, (a5)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v2
+; NOREMAT-NEXT: slli a5, s2, 10
+; NOREMAT-NEXT: sd a5, 120(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: add a5, a0, a5
+; NOREMAT-NEXT: vle32.v v2, (a5)
+; NOREMAT-NEXT: vle32.v v20, (a5)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v4
+; NOREMAT-NEXT: li t5, 27
+; NOREMAT-NEXT: slli a5, t5, 9
+; NOREMAT-NEXT: sd a5, 112(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: add a5, a0, a5
+; NOREMAT-NEXT: vle32.v v4, (a5)
+; NOREMAT-NEXT: vle32.v v22, (a5)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v24
; NOREMAT-NEXT: slli a4, a4, 11
-; NOREMAT-NEXT: sd a4, 144(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd a4, 104(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
+; NOREMAT-NEXT: vle32.v v24, (a4)
; NOREMAT-NEXT: vle32.v v12, (a4)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a4)
-; NOREMAT-NEXT: li s10, 21
-; NOREMAT-NEXT: slli a4, s10, 9
-; NOREMAT-NEXT: sd a4, 136(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v26
+; NOREMAT-NEXT: li t2, 29
+; NOREMAT-NEXT: slli a4, t2, 9
+; NOREMAT-NEXT: sd a4, 96(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
+; NOREMAT-NEXT: vle32.v v26, (a4)
; NOREMAT-NEXT: vle32.v v14, (a4)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a4)
-; NOREMAT-NEXT: slli a4, s8, 10
-; NOREMAT-NEXT: sd a4, 128(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v28
+; NOREMAT-NEXT: slli a4, a2, 10
+; NOREMAT-NEXT: sd a4, 88(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
-; NOREMAT-NEXT: vle32.v v12, (a4)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT: vle32.v v28, (a4)
; NOREMAT-NEXT: vle32.v v8, (a4)
-; NOREMAT-NEXT: li s6, 23
-; NOREMAT-NEXT: slli a4, s6, 9
-; NOREMAT-NEXT: sd a4, 120(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a4, a0, a4
-; NOREMAT-NEXT: vle32.v v14, (a4)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a4)
-; NOREMAT-NEXT: lui a4, 3
+; NOREMAT-NEXT: csrr a4, vlenb
+; NOREMAT-NEXT: slli a4, a4, 2
+; NOREMAT-NEXT: add a4, sp, a4
+; NOREMAT-NEXT: addi a4, a4, 288
+; NOREMAT-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v30
+; NOREMAT-NEXT: li a5, 31
+; NOREMAT-NEXT: slli a4, a5, 9
+; NOREMAT-NEXT: sd a4, 80(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
-; NOREMAT-NEXT: vle32.v v12, (a4)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT: vle32.v v30, (a4)
+; NOREMAT-NEXT: vle32.v v16, (a4)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v0
+; NOREMAT-NEXT: lui a6, 4
+; NOREMAT-NEXT: add a4, a0, a6
+; NOREMAT-NEXT: vle32.v v0, (a4)
; NOREMAT-NEXT: vle32.v v8, (a4)
-; NOREMAT-NEXT: li s3, 25
-; NOREMAT-NEXT: slli a4, s3, 9
-; NOREMAT-NEXT: sd a4, 112(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: csrr a4, vlenb
+; NOREMAT-NEXT: slli a4, a4, 1
+; NOREMAT-NEXT: add a4, sp, a4
+; NOREMAT-NEXT: addi a4, a4, 288
+; NOREMAT-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v2
+; NOREMAT-NEXT: addiw a4, a6, 512
+; NOREMAT-NEXT: sd a4, 72(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
-; NOREMAT-NEXT: vle32.v v14, (a4)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a4)
-; NOREMAT-NEXT: slli a4, s2, 10
-; NOREMAT-NEXT: sd a4, 104(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: vle32.v v2, (a4)
+; NOREMAT-NEXT: vle32.v v18, (a4)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v4
+; NOREMAT-NEXT: slli a4, t0, 10
+; NOREMAT-NEXT: sd a4, 64(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
-; NOREMAT-NEXT: vle32.v v12, (a4)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a4)
-; NOREMAT-NEXT: li t5, 27
-; NOREMAT-NEXT: slli a4, t5, 9
-; NOREMAT-NEXT: sd a4, 96(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: vle32.v v4, (a4)
+; NOREMAT-NEXT: vle32.v v20, (a4)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v24
+; NOREMAT-NEXT: addiw a4, a6, 1536
+; NOREMAT-NEXT: sd a4, 56(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a4, a0, a4
-; NOREMAT-NEXT: vle32.v v14, (a4)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a4)
+; NOREMAT-NEXT: vle32.v v6, (a4)
+; NOREMAT-NEXT: vle32.v v22, (a4)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v26
; NOREMAT-NEXT: slli a3, a3, 11
-; NOREMAT-NEXT: sd a3, 88(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a3, a0, a3
; NOREMAT-NEXT: vle32.v v12, (a3)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT: vle32.v v8, (a3)
-; NOREMAT-NEXT: li t2, 29
-; NOREMAT-NEXT: slli a3, t2, 9
-; NOREMAT-NEXT: sd a3, 80(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a3, a0, a3
-; NOREMAT-NEXT: vle32.v v14, (a3)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a3)
-; NOREMAT-NEXT: slli a3, t0, 10
-; NOREMAT-NEXT: sd a3, 72(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: addi a3, sp, 288
+; NOREMAT-NEXT: vs2r.v v8, (a3) # Unknown-size Folded Spill
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v28
+; NOREMAT-NEXT: lui s1, 5
+; NOREMAT-NEXT: addiw a3, s1, -1536
+; NOREMAT-NEXT: sd a3, 40(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a3, a0, a3
-; NOREMAT-NEXT: vle32.v v12, (a3)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
; NOREMAT-NEXT: vle32.v v8, (a3)
-; NOREMAT-NEXT: li a5, 31
-; NOREMAT-NEXT: slli a3, a5, 9
-; NOREMAT-NEXT: sd a3, 64(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: vle32.v v24, (a3)
+; NOREMAT-NEXT: csrr a3, vlenb
+; NOREMAT-NEXT: slli a3, a3, 2
+; NOREMAT-NEXT: add a3, sp, a3
+; NOREMAT-NEXT: addi a3, a3, 288
+; NOREMAT-NEXT: vl2r.v v10, (a3) # Unknown-size Folded Reload
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v30
+; NOREMAT-NEXT: slli a3, a7, 10
+; NOREMAT-NEXT: sd a3, 32(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a3, a0, a3
-; NOREMAT-NEXT: vle32.v v14, (a3)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
; NOREMAT-NEXT: vle32.v v10, (a3)
-; NOREMAT-NEXT: lui a4, 4
-; NOREMAT-NEXT: add a3, a0, a4
-; NOREMAT-NEXT: vle32.v v12, (a3)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a3)
-; NOREMAT-NEXT: addiw a3, a4, 512
-; NOREMAT-NEXT: sd a3, 56(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a3, a0, a3
; NOREMAT-NEXT: vle32.v v14, (a3)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a3)
-; NOREMAT-NEXT: slli a3, a7, 10
-; NOREMAT-NEXT: sd a3, 48(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a3, a0, a3
-; NOREMAT-NEXT: vle32.v v12, (a3)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a3)
-; NOREMAT-NEXT: addiw a3, a4, 1536
-; NOREMAT-NEXT: sd a3, 40(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v0
+; NOREMAT-NEXT: addiw a3, s1, -512
+; NOREMAT-NEXT: sd a3, 24(sp) # 8-byte Folded Spill
; NOREMAT-NEXT: add a3, a0, a3
-; NOREMAT-NEXT: vle32.v v14, (a3)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a3)
-; NOREMAT-NEXT: slli a2, a2, 11
-; NOREMAT-NEXT: sd a2, 32(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a2, a0, a2
-; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
-; NOREMAT-NEXT: lui s1, 5
-; NOREMAT-NEXT: addiw a2, s1, -1536
-; NOREMAT-NEXT: sd a2, 24(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a2, a0, a2
-; NOREMAT-NEXT: vle32.v v14, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
-; NOREMAT-NEXT: slli a2, a6, 10
-; NOREMAT-NEXT: sd a2, 16(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a2, a0, a2
-; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
-; NOREMAT-NEXT: addiw a2, s1, -512
-; NOREMAT-NEXT: sd a2, 8(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT: add a2, a0, a2
-; NOREMAT-NEXT: vle32.v v14, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
-; NOREMAT-NEXT: add a2, a0, s1
-; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
+; NOREMAT-NEXT: vle32.v v0, (a3)
+; NOREMAT-NEXT: vle32.v v16, (a3)
+; NOREMAT-NEXT: csrr a3, vlenb
+; NOREMAT-NEXT: slli a3, a3, 1
+; NOREMAT-NEXT: add a3, sp, a3
+; NOREMAT-NEXT: addi a3, a3, 288
+; NOREMAT-NEXT: vl2r.v v26, (a3) # Unknown-size Folded Reload
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v26, v2
+; NOREMAT-NEXT: add a3, a0, s1
+; NOREMAT-NEXT: vle32.v v26, (a3)
+; NOREMAT-NEXT: vle32.v v28, (a3)
+; NOREMAT-NEXT: csrr a3, vlenb
+; NOREMAT-NEXT: slli a3, a3, 2
+; NOREMAT-NEXT: add a3, sp, a3
+; NOREMAT-NEXT: addi a3, a3, 288
+; NOREMAT-NEXT: vs2r.v v28, (a3) # Unknown-size Folded Spill
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v4
; NOREMAT-NEXT: addiw ra, s1, 512
-; NOREMAT-NEXT: add a2, a0, ra
-; NOREMAT-NEXT: vle32.v v14, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
+; NOREMAT-NEXT: add a3, a0, ra
+; NOREMAT-NEXT: vle32.v v28, (a3)
+; NOREMAT-NEXT: vle32.v v30, (a3)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v6
; NOREMAT-NEXT: slli s11, s10, 10
-; NOREMAT-NEXT: add a2, a0, s11
-; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
+; NOREMAT-NEXT: add a3, a0, s11
+; NOREMAT-NEXT: vle32.v v2, (a3)
+; NOREMAT-NEXT: vle32.v v18, (a3)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v12
; NOREMAT-NEXT: addiw s10, s1, 1536
-; NOREMAT-NEXT: add a2, a0, s10
-; NOREMAT-NEXT: vle32.v v14, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
+; NOREMAT-NEXT: add a3, a0, s10
+; NOREMAT-NEXT: vle32.v v4, (a3)
+; NOREMAT-NEXT: vle32.v v20, (a3)
+; NOREMAT-NEXT: addi a3, sp, 288
+; NOREMAT-NEXT: vl2r.v v12, (a3) # Unknown-size Folded Reload
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v8
; NOREMAT-NEXT: slli s9, s8, 11
-; NOREMAT-NEXT: add a2, a0, s9
-; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
-; NOREMAT-NEXT: lui t1, 6
-; NOREMAT-NEXT: addiw s8, t1, -1536
-; NOREMAT-NEXT: add a2, a0, s8
-; NOREMAT-NEXT: vle32.v v14, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
+; NOREMAT-NEXT: add a3, a0, s9
+; NOREMAT-NEXT: vle32.v v6, (a3)
+; NOREMAT-NEXT: vle32.v v12, (a3)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v10
+; NOREMAT-NEXT: lui t0, 6
+; NOREMAT-NEXT: addiw s8, t0, -1536
+; NOREMAT-NEXT: add a3, a0, s8
+; NOREMAT-NEXT: vle32.v v8, (a3)
+; NOREMAT-NEXT: vle32.v v22, (a3)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v0
; NOREMAT-NEXT: slli s7, s6, 10
-; NOREMAT-NEXT: add a2, a0, s7
-; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
-; NOREMAT-NEXT: addiw s6, t1, -512
-; NOREMAT-NEXT: add a2, a0, s6
-; NOREMAT-NEXT: vle32.v v14, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
-; NOREMAT-NEXT: add a2, a0, t1
-; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
-; NOREMAT-NEXT: addiw s5, t1, 512
-; NOREMAT-NEXT: add a2, a0, s5
-; NOREMAT-NEXT: vle32.v v14, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
+; NOREMAT-NEXT: add a3, a0, s7
+; NOREMAT-NEXT: vle32.v v10, (a3)
+; NOREMAT-NEXT: vle32.v v14, (a3)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v26
+; NOREMAT-NEXT: addiw s6, t0, -512
+; NOREMAT-NEXT: add a3, a0, s6
+; NOREMAT-NEXT: vle32.v v0, (a3)
+; NOREMAT-NEXT: vle32.v v16, (a3)
+; NOREMAT-NEXT: csrr a3, vlenb
+; NOREMAT-NEXT: slli a3, a3, 2
+; NOREMAT-NEXT: add a3, sp, a3
+; NOREMAT-NEXT: addi a3, a3, 288
+; NOREMAT-NEXT: vl2r.v v24, (a3) # Unknown-size Folded Reload
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v28
+; NOREMAT-NEXT: add a3, a0, t0
+; NOREMAT-NEXT: vle32.v v24, (a3)
+; NOREMAT-NEXT: vle32.v v26, (a3)
+; NOREMAT-NEXT: csrr a3, vlenb
+; NOREMAT-NEXT: slli a3, a3, 2
+; NOREMAT-NEXT: add a3, sp, a3
+; NOREMAT-NEXT: addi a3, a3, 288
+; NOREMAT-NEXT: vs2r.v v26, (a3) # Unknown-size Folded Spill
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v2
+; NOREMAT-NEXT: addiw s5, t0, 512
+; NOREMAT-NEXT: add a3, a0, s5
+; NOREMAT-NEXT: vle32.v v26, (a3)
+; NOREMAT-NEXT: vle32.v v28, (a3)
+; NOREMAT-NEXT: csrr a3, vlenb
+; NOREMAT-NEXT: slli a3, a3, 1
+; NOREMAT-NEXT: add a3, sp, a3
+; NOREMAT-NEXT: addi a3, a3, 288
+; NOREMAT-NEXT: vs2r.v v28, (a3) # Unknown-size Folded Spill
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v4
; NOREMAT-NEXT: slli s4, s3, 10
-; NOREMAT-NEXT: add a2, a0, s4
-; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
-; NOREMAT-NEXT: addiw s3, t1, 1536
-; NOREMAT-NEXT: add a2, a0, s3
-; NOREMAT-NEXT: vle32.v v14, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
+; NOREMAT-NEXT: add a3, a0, s4
+; NOREMAT-NEXT: vle32.v v28, (a3)
+; NOREMAT-NEXT: vle32.v v18, (a3)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v6
+; NOREMAT-NEXT: addiw s3, t0, 1536
+; NOREMAT-NEXT: add a3, a0, s3
+; NOREMAT-NEXT: vle32.v v30, (a3)
+; NOREMAT-NEXT: vle32.v v20, (a3)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v8
; NOREMAT-NEXT: slli s2, s2, 11
-; NOREMAT-NEXT: add a2, a0, s2
-; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
+; NOREMAT-NEXT: add a3, a0, s2
+; NOREMAT-NEXT: vle32.v v2, (a3)
+; NOREMAT-NEXT: vle32.v v12, (a3)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v10
; NOREMAT-NEXT: lui a3, 7
; NOREMAT-NEXT: addiw s0, a3, -1536
-; NOREMAT-NEXT: add a2, a0, s0
-; NOREMAT-NEXT: vle32.v v14, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
+; NOREMAT-NEXT: add a4, a0, s0
+; NOREMAT-NEXT: vle32.v v4, (a4)
+; NOREMAT-NEXT: vle32.v v22, (a4)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v0
; NOREMAT-NEXT: slli t6, t5, 10
-; NOREMAT-NEXT: add a2, a0, t6
-; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
+; NOREMAT-NEXT: add a4, a0, t6
+; NOREMAT-NEXT: vle32.v v6, (a4)
+; NOREMAT-NEXT: vle32.v v14, (a4)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v24
; NOREMAT-NEXT: addiw t5, a3, -512
-; NOREMAT-NEXT: add a2, a0, t5
-; NOREMAT-NEXT: vle32.v v14, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
-; NOREMAT-NEXT: add a2, a0, a3
-; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
+; NOREMAT-NEXT: add a4, a0, t5
+; NOREMAT-NEXT: vle32.v v0, (a4)
+; NOREMAT-NEXT: vle32.v v16, (a4)
+; NOREMAT-NEXT: csrr a4, vlenb
+; NOREMAT-NEXT: slli a4, a4, 2
+; NOREMAT-NEXT: add a4, sp, a4
+; NOREMAT-NEXT: addi a4, a4, 288
+; NOREMAT-NEXT: vl2r.v v8, (a4) # Unknown-size Folded Reload
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v26
+; NOREMAT-NEXT: add a4, a0, a3
+; NOREMAT-NEXT: vle32.v v26, (a4)
+; NOREMAT-NEXT: vle32.v v8, (a4)
+; NOREMAT-NEXT: csrr a4, vlenb
+; NOREMAT-NEXT: slli a4, a4, 1
+; NOREMAT-NEXT: add a4, sp, a4
+; NOREMAT-NEXT: addi a4, a4, 288
+; NOREMAT-NEXT: vl2r.v v10, (a4) # Unknown-size Folded Reload
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v28
; NOREMAT-NEXT: addiw t4, a3, 512
-; NOREMAT-NEXT: add a2, a0, t4
-; NOREMAT-NEXT: vle32.v v14, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
+; NOREMAT-NEXT: add a4, a0, t4
+; NOREMAT-NEXT: vle32.v v10, (a4)
+; NOREMAT-NEXT: vle32.v v24, (a4)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v30
; NOREMAT-NEXT: slli t3, t2, 10
-; NOREMAT-NEXT: add a2, a0, t3
-; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
+; NOREMAT-NEXT: add a4, a0, t3
+; NOREMAT-NEXT: vle32.v v18, (a4)
+; NOREMAT-NEXT: vle32.v v28, (a4)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v2
; NOREMAT-NEXT: addiw t2, a3, 1536
-; NOREMAT-NEXT: add a2, a0, t2
-; NOREMAT-NEXT: vle32.v v14, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a2)
-; NOREMAT-NEXT: slli t0, t0, 11
-; NOREMAT-NEXT: add a2, a0, t0
+; NOREMAT-NEXT: add a4, a0, t2
+; NOREMAT-NEXT: vle32.v v20, (a4)
+; NOREMAT-NEXT: vle32.v v30, (a4)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v4
+; NOREMAT-NEXT: slli t1, a2, 11
+; NOREMAT-NEXT: add a2, a0, t1
; NOREMAT-NEXT: vle32.v v12, (a2)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a2)
+; NOREMAT-NEXT: vle32.v v2, (a2)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v6
; NOREMAT-NEXT: lui a2, 8
; NOREMAT-NEXT: addiw a7, a2, -1536
; NOREMAT-NEXT: add a4, a0, a7
-; NOREMAT-NEXT: vle32.v v14, (a4)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a4)
+; NOREMAT-NEXT: vle32.v v22, (a4)
+; NOREMAT-NEXT: vle32.v v4, (a4)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v0
; NOREMAT-NEXT: slli a6, a5, 10
; NOREMAT-NEXT: add a4, a0, a6
-; NOREMAT-NEXT: vle32.v v12, (a4)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: vle32.v v8, (a4)
+; NOREMAT-NEXT: vle32.v v14, (a4)
+; NOREMAT-NEXT: vle32.v v0, (a4)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v26
; NOREMAT-NEXT: addiw a5, a2, -512
; NOREMAT-NEXT: add a4, a0, a5
-; NOREMAT-NEXT: vle32.v v14, (a4)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT: vle32.v v10, (a4)
+; NOREMAT-NEXT: vle32.v v16, (a4)
+; NOREMAT-NEXT: vle32.v v26, (a4)
; NOREMAT-NEXT: add a0, a0, a2
-; NOREMAT-NEXT: vle32.v v12, (a0)
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT: vle32.v v6, (a0)
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v10
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v18
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v28, v20
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v12
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v22
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v14
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v16
+; NOREMAT-NEXT: sf.vc.vv 3, 0, v26, v6
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: addi a0, a1, 1024
; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: addi a0, a1, 1536
-; NOREMAT-NEXT: vse32.v v10, (a0)
+; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT: ld a0, 272(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT: ld a0, 264(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 256(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 248(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
-; NOREMAT-NEXT: ld a0, 240(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: lui a0, 1
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
-; NOREMAT-NEXT: ld a0, 232(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT: ld a0, 240(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
-; NOREMAT-NEXT: lui a0, 1
+; NOREMAT-NEXT: ld a0, 232(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 224(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: ld a0, 216(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 208(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: ld a0, 200(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 192(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
-; NOREMAT-NEXT: ld a0, 184(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: lui a0, 2
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
-; NOREMAT-NEXT: ld a0, 176(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT: ld a0, 184(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
-; NOREMAT-NEXT: lui a0, 2
+; NOREMAT-NEXT: ld a0, 176(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 168(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: ld a0, 160(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 152(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: ld a0, 144(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 136(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
-; NOREMAT-NEXT: ld a0, 128(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: lui a0, 3
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
-; NOREMAT-NEXT: ld a0, 120(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT: ld a0, 128(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
-; NOREMAT-NEXT: lui a0, 3
+; NOREMAT-NEXT: ld a0, 120(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 112(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: ld a0, 104(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 96(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: ld a0, 88(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 80(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
-; NOREMAT-NEXT: ld a0, 72(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: lui a0, 4
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
-; NOREMAT-NEXT: ld a0, 64(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT: ld a0, 72(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
-; NOREMAT-NEXT: lui a0, 4
+; NOREMAT-NEXT: ld a0, 64(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 56(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: ld a0, 48(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: ld a0, 40(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: ld a0, 32(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
-; NOREMAT-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
-; NOREMAT-NEXT: ld a0, 16(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld a0, 24(sp) # 8-byte Folded Reload
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
-; NOREMAT-NEXT: ld a0, 8(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: add s1, a1, s1
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (s1)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add ra, a1, ra
+; NOREMAT-NEXT: vse32.v v8, (ra)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (ra)
; NOREMAT-NEXT: add s11, a1, s11
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (s11)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s10, a1, s10
+; NOREMAT-NEXT: vse32.v v8, (s10)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (s10)
; NOREMAT-NEXT: add s9, a1, s9
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (s9)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s8, a1, s8
+; NOREMAT-NEXT: vse32.v v8, (s8)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (s8)
; NOREMAT-NEXT: add s7, a1, s7
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (s7)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s6, a1, s6
+; NOREMAT-NEXT: vse32.v v8, (s6)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT: add t0, a1, t0
+; NOREMAT-NEXT: vse32.v v8, (t0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (s6)
-; NOREMAT-NEXT: add t1, a1, t1
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
-; NOREMAT-NEXT: vse32.v v8, (t1)
; NOREMAT-NEXT: add s5, a1, s5
+; NOREMAT-NEXT: vse32.v v8, (s5)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (s5)
; NOREMAT-NEXT: add s4, a1, s4
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (s4)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s3, a1, s3
+; NOREMAT-NEXT: vse32.v v8, (s3)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (s3)
; NOREMAT-NEXT: add s2, a1, s2
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (s2)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add s0, a1, s0
+; NOREMAT-NEXT: vse32.v v8, (s0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (s0)
; NOREMAT-NEXT: add t6, a1, t6
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (t6)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add t5, a1, t5
+; NOREMAT-NEXT: vse32.v v8, (t5)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (t5)
; NOREMAT-NEXT: add a3, a1, a3
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a3)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add t4, a1, t4
+; NOREMAT-NEXT: vse32.v v8, (t4)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (t4)
; NOREMAT-NEXT: add t3, a1, t3
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (t3)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: add t2, a1, t2
+; NOREMAT-NEXT: vse32.v v8, (t2)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT: add t1, a1, t1
+; NOREMAT-NEXT: vse32.v v8, (t1)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (t2)
-; NOREMAT-NEXT: add t0, a1, t0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
-; NOREMAT-NEXT: vse32.v v8, (t0)
; NOREMAT-NEXT: add a7, a1, a7
+; NOREMAT-NEXT: vse32.v v8, (a7)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a7)
; NOREMAT-NEXT: add a6, a1, a6
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a6)
-; NOREMAT-NEXT: add a5, a1, a5
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a5)
-; NOREMAT-NEXT: add a0, a1, a2
+; NOREMAT-NEXT: add a5, a1, a5
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
-; NOREMAT-NEXT: vse32.v v8, (a0)
-; NOREMAT-NEXT: addiw a0, a2, 512
-; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: vse32.v v8, (a5)
+; NOREMAT-NEXT: add a0, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: vse32.v v10, (a0)
-; NOREMAT-NEXT: addiw a0, a2, 1024
+; NOREMAT-NEXT: addiw a0, a2, 512
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
-; NOREMAT-NEXT: addiw a0, a2, 1536
+; NOREMAT-NEXT: addiw a0, a2, 1024
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: vse32.v v10, (a0)
-; NOREMAT-NEXT: li a0, 17
-; NOREMAT-NEXT: slli a0, a0, 11
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT: addiw a0, a2, 1536
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: li a0, 17
+; NOREMAT-NEXT: slli a0, a0, 11
+; NOREMAT-NEXT: add a0, a1, a0
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: lui a0, 9
; NOREMAT-NEXT: addiw a2, a0, -1536
; NOREMAT-NEXT: add a2, a1, a2
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a2)
-; NOREMAT-NEXT: addiw a2, a0, -1024
-; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a2)
-; NOREMAT-NEXT: addiw a2, a0, -512
+; NOREMAT-NEXT: addiw a2, a0, -1024
; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: vse32.v v10, (a2)
-; NOREMAT-NEXT: add a2, a1, a0
+; NOREMAT-NEXT: addiw a2, a0, -512
+; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a2)
-; NOREMAT-NEXT: addiw a2, a0, 512
-; NOREMAT-NEXT: add a2, a1, a2
+; NOREMAT-NEXT: add a2, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: vse32.v v10, (a2)
-; NOREMAT-NEXT: addiw a2, a0, 1024
+; NOREMAT-NEXT: addiw a2, a0, 512
; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a2)
+; NOREMAT-NEXT: addiw a2, a0, 1024
+; NOREMAT-NEXT: add a2, a1, a2
+; NOREMAT-NEXT: vse32.v v10, (a2)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: addiw a0, a0, 1536
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: li a0, 19
; NOREMAT-NEXT: slli a0, a0, 11
; NOREMAT-NEXT: add a0, a1, a0
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
-; NOREMAT-NEXT: vse32.v v8, (a0)
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT: vse32.v v10, (a0)
; NOREMAT-NEXT: lui a0, 10
; NOREMAT-NEXT: addiw a2, a0, -1536
; NOREMAT-NEXT: add a2, a1, a2
-; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a2)
-; NOREMAT-NEXT: addiw a2, a0, -1024
-; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a2)
-; NOREMAT-NEXT: addiw a2, a0, -512
+; NOREMAT-NEXT: addiw a2, a0, -1024
; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; NOREMAT-NEXT: vse32.v v10, (a2)
-; NOREMAT-NEXT: add a2, a1, a0
+; NOREMAT-NEXT: addiw a2, a0, -512
+; NOREMAT-NEXT: add a2, a1, a2
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; NOREMAT-NEXT: vse32.v v8, (a2)
+; NOREMAT-NEXT: add a2, a1, a0
+; NOREMAT-NEXT: vse32.v v10, (a2)
; NOREMAT-NEXT: addiw a0, a0, 512
; NOREMAT-NEXT: add a0, a1, a0
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: vse32.v v10, (a0)
+; NOREMAT-NEXT: vse32.v v8, (a0)
; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT: ld ra, 360(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: ld s0, 352(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: ld s1, 344(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: ld s2, 336(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: ld s3, 328(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: ld s4, 320(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: ld s5, 312(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: ld s6, 304(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: ld s7, 296(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: ld s8, 288(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: ld s9, 280(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: ld s10, 272(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: ld s11, 264(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT: addi sp, sp, 368
+; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT: csrr a0, vlenb
+; NOREMAT-NEXT: li a1, 6
+; NOREMAT-NEXT: mul a0, a0, a1
+; NOREMAT-NEXT: add sp, sp, a0
+; NOREMAT-NEXT: ld ra, 392(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld s0, 384(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld s1, 376(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld s2, 368(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld s3, 360(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld s4, 352(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld s5, 344(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld s6, 336(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld s7, 328(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld s8, 320(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld s9, 312(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld s10, 304(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: ld s11, 296(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT: addi sp, sp, 400
; NOREMAT-NEXT: ret
;
; REMAT-LABEL: test:
@@ -864,512 +927,512 @@ define void @test(ptr %0, ptr %1, i64 %2) {
; REMAT-NEXT: add a2, a0, a2
; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: li a2, 11
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v12, (a2)
+; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v10, (a2)
; REMAT-NEXT: li a2, 23
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v26, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v12, v16
+; REMAT-NEXT: vle32.v v12, (a2)
; REMAT-NEXT: lui a2, 3
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v28, (a2)
+; REMAT-NEXT: vle32.v v14, (a2)
; REMAT-NEXT: li a2, 25
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v30, (a2)
+; REMAT-NEXT: vle32.v v16, (a2)
; REMAT-NEXT: li a2, 13
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v0, (a2)
+; REMAT-NEXT: vle32.v v18, (a2)
; REMAT-NEXT: li a2, 27
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v2, (a2)
+; REMAT-NEXT: vle32.v v20, (a2)
; REMAT-NEXT: li a2, 7
; REMAT-NEXT: slli a2, a2, 11
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v4, (a2)
+; REMAT-NEXT: vle32.v v22, (a2)
; REMAT-NEXT: li a2, 29
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v24, (a2)
+; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v10, v26
; REMAT-NEXT: li a2, 15
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v26, (a2)
+; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v12, v28
; REMAT-NEXT: li a2, 31
; REMAT-NEXT: slli a2, a2, 9
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v28, (a2)
+; REMAT-NEXT: vle32.v v12, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v14, v30
; REMAT-NEXT: lui a2, 4
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v30, (a2)
+; REMAT-NEXT: vle32.v v14, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v16, v0
; REMAT-NEXT: lui a2, 4
; REMAT-NEXT: addiw a2, a2, 512
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v0, (a2)
+; REMAT-NEXT: vle32.v v16, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v18, v2
; REMAT-NEXT: li a2, 17
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v2, (a2)
+; REMAT-NEXT: vle32.v v18, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v20, v4
; REMAT-NEXT: lui a2, 4
; REMAT-NEXT: addiw a2, a2, 1536
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v4, (a2)
+; REMAT-NEXT: vle32.v v20, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v22, v24
; REMAT-NEXT: li a2, 9
; REMAT-NEXT: slli a2, a2, 11
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v24, (a2)
+; REMAT-NEXT: vle32.v v22, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v8, v26
; REMAT-NEXT: lui a2, 5
; REMAT-NEXT: addiw a2, a2, -1536
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v26, (a2)
+; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v10, v28
; REMAT-NEXT: li a2, 19
; REMAT-NEXT: slli a2, a2, 10
; REMAT-NEXT: add a2, a0, a2
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v28, (a2)
+; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v12, v30
; REMAT-NEXT: lui ra, 5
; REMAT-NEXT: addiw ra, ra, -512
; REMAT-NEXT: add a2, a0, ra
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v30, (a2)
+; REMAT-NEXT: vle32.v v12, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v14, v0
; REMAT-NEXT: lui s11, 5
; REMAT-NEXT: add a2, a0, s11
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v0, (a2)
+; REMAT-NEXT: vle32.v v14, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v16, v2
; REMAT-NEXT: lui s10, 5
; REMAT-NEXT: addiw s10, s10, 512
; REMAT-NEXT: add a2, a0, s10
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v2, (a2)
+; REMAT-NEXT: vle32.v v16, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v18, v4
; REMAT-NEXT: li s9, 21
; REMAT-NEXT: slli s9, s9, 10
; REMAT-NEXT: add a2, a0, s9
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v4, (a2)
+; REMAT-NEXT: vle32.v v18, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v20, v24
; REMAT-NEXT: lui s8, 5
; REMAT-NEXT: addiw s8, s8, 1536
; REMAT-NEXT: add a2, a0, s8
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v24, (a2)
+; REMAT-NEXT: vle32.v v20, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v22, v26
; REMAT-NEXT: li s7, 11
; REMAT-NEXT: slli s7, s7, 11
; REMAT-NEXT: add a2, a0, s7
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v26, (a2)
+; REMAT-NEXT: vle32.v v22, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v8, v28
; REMAT-NEXT: lui s6, 6
; REMAT-NEXT: addiw s6, s6, -1536
; REMAT-NEXT: add a2, a0, s6
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v28, (a2)
+; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v10, v30
; REMAT-NEXT: li s5, 23
; REMAT-NEXT: slli s5, s5, 10
; REMAT-NEXT: add a2, a0, s5
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v30, (a2)
+; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v12, v0
; REMAT-NEXT: lui s4, 6
; REMAT-NEXT: addiw s4, s4, -512
; REMAT-NEXT: add a2, a0, s4
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v0, (a2)
+; REMAT-NEXT: vle32.v v12, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v14, v2
; REMAT-NEXT: lui s3, 6
; REMAT-NEXT: add a2, a0, s3
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v2, (a2)
+; REMAT-NEXT: vle32.v v14, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v16, v4
; REMAT-NEXT: lui s2, 6
; REMAT-NEXT: addiw s2, s2, 512
; REMAT-NEXT: add a2, a0, s2
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v4, (a2)
+; REMAT-NEXT: vle32.v v16, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v18, v24
; REMAT-NEXT: li s1, 25
; REMAT-NEXT: slli s1, s1, 10
; REMAT-NEXT: add a2, a0, s1
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v6, (a2)
+; REMAT-NEXT: vle32.v v18, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v20, v26
; REMAT-NEXT: lui s0, 6
; REMAT-NEXT: addiw s0, s0, 1536
; REMAT-NEXT: add a2, a0, s0
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v26, (a2)
+; REMAT-NEXT: vle32.v v20, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v22, v28
; REMAT-NEXT: li t6, 13
; REMAT-NEXT: slli t6, t6, 11
; REMAT-NEXT: add a2, a0, t6
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v28, (a2)
+; REMAT-NEXT: vle32.v v22, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v8, v30
; REMAT-NEXT: lui t5, 7
; REMAT-NEXT: addiw t5, t5, -1536
; REMAT-NEXT: add a2, a0, t5
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v30, (a2)
+; REMAT-NEXT: vle32.v v24, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v10, v0
; REMAT-NEXT: li t4, 27
; REMAT-NEXT: slli t4, t4, 10
; REMAT-NEXT: add a2, a0, t4
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v0, (a2)
+; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v12, v2
; REMAT-NEXT: lui t3, 7
; REMAT-NEXT: addiw t3, t3, -512
; REMAT-NEXT: add a2, a0, t3
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v2, (a2)
+; REMAT-NEXT: vle32.v v12, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v14, v4
; REMAT-NEXT: lui t2, 7
; REMAT-NEXT: add a2, a0, t2
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v16, v6
; REMAT-NEXT: lui t1, 7
; REMAT-NEXT: addiw t1, t1, 512
; REMAT-NEXT: add a2, a0, t1
; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v16, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v18, v26
; REMAT-NEXT: li t0, 29
; REMAT-NEXT: slli t0, t0, 10
; REMAT-NEXT: add a2, a0, t0
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v18, (a2)
+; REMAT-NEXT: vle32.v v26, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v20, v28
; REMAT-NEXT: lui a7, 7
; REMAT-NEXT: addiw a7, a7, 1536
; REMAT-NEXT: add a2, a0, a7
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v20, (a2)
+; REMAT-NEXT: vle32.v v28, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v22, v30
; REMAT-NEXT: li a6, 15
; REMAT-NEXT: slli a6, a6, 11
; REMAT-NEXT: add a2, a0, a6
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v22, (a2)
+; REMAT-NEXT: vle32.v v30, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v24, v0
; REMAT-NEXT: lui a5, 8
; REMAT-NEXT: addiw a5, a5, -1536
; REMAT-NEXT: add a2, a0, a5
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v24, (a2)
+; REMAT-NEXT: vle32.v v0, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v10, v2
; REMAT-NEXT: li a4, 31
; REMAT-NEXT: slli a4, a4, 10
; REMAT-NEXT: add a2, a0, a4
-; REMAT-NEXT: vle32.v v12, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: vle32.v v8, (a2)
+; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v2, (a2)
+; REMAT-NEXT: sf.vc.vv 3, 0, v12, v4
; REMAT-NEXT: lui a3, 8
; REMAT-NEXT: addiw a3, a3, -512
; REMAT-NEXT: add a2, a0, a3
-; REMAT-NEXT: vle32.v v14, (a2)
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT: vle32.v v10, (a2)
+; REMAT-NEXT: vle32.v v12, (a2)
+; REMAT-NEXT: vle32.v v4, (a2)
; REMAT-NEXT: lui a2, 8
; REMAT-NEXT: add a0, a0, a2
-; REMAT-NEXT: vle32.v v12, (a0)
+; REMAT-NEXT: vle32.v v6, (a0)
; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT: sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT: sf.vc.vv 3, 0, v16, v18
+; REMAT-NEXT: sf.vc.vv 3, 0, v26, v20
+; REMAT-NEXT: sf.vc.vv 3, 0, v28, v22
+; REMAT-NEXT: sf.vc.vv 3, 0, v30, v24
+; REMAT-NEXT: sf.vc.vv 3, 0, v0, v10
+; REMAT-NEXT: sf.vc.vv 3, 0, v2, v12
+; REMAT-NEXT: sf.vc.vv 3, 0, v4, v6
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: addi a0, a1, 1024
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: addi a0, a1, 1536
-; REMAT-NEXT: vse32.v v10, (a0)
+; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 1
; REMAT-NEXT: slli a0, a0, 11
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 5
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 3
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 7
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: lui a0, 1
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 9
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 5
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 11
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 3
; REMAT-NEXT: slli a0, a0, 11
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 13
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 7
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 15
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: lui a0, 2
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 17
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 9
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 19
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 5
; REMAT-NEXT: slli a0, a0, 11
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 21
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 11
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 23
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: lui a0, 3
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 25
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 13
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 27
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 7
; REMAT-NEXT: slli a0, a0, 11
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 29
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 15
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: li a0, 31
; REMAT-NEXT: slli a0, a0, 9
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: lui a0, 4
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: lui a0, 4
; REMAT-NEXT: addiw a0, a0, 512
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 17
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: lui a0, 4
; REMAT-NEXT: addiw a0, a0, 1536
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 9
; REMAT-NEXT: slli a0, a0, 11
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: lui a0, 5
; REMAT-NEXT: addiw a0, a0, -1536
; REMAT-NEXT: add a0, a1, a0
+; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: li a0, 19
; REMAT-NEXT: slli a0, a0, 10
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add ra, a1, ra
+; REMAT-NEXT: vse32.v v8, (ra)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (ra)
; REMAT-NEXT: add s11, a1, s11
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (s11)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s10, a1, s10
+; REMAT-NEXT: vse32.v v8, (s10)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (s10)
; REMAT-NEXT: add s9, a1, s9
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (s9)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s8, a1, s8
+; REMAT-NEXT: vse32.v v8, (s8)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (s8)
; REMAT-NEXT: add s7, a1, s7
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (s7)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s6, a1, s6
+; REMAT-NEXT: vse32.v v8, (s6)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (s6)
; REMAT-NEXT: add s5, a1, s5
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (s5)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s4, a1, s4
+; REMAT-NEXT: vse32.v v8, (s4)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (s4)
; REMAT-NEXT: add s3, a1, s3
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (s3)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s2, a1, s2
+; REMAT-NEXT: vse32.v v8, (s2)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (s2)
; REMAT-NEXT: add s1, a1, s1
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (s1)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add s0, a1, s0
+; REMAT-NEXT: vse32.v v8, (s0)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (s0)
; REMAT-NEXT: add t6, a1, t6
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (t6)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add t5, a1, t5
+; REMAT-NEXT: vse32.v v8, (t5)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (t5)
; REMAT-NEXT: add t4, a1, t4
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (t4)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add t3, a1, t3
+; REMAT-NEXT: vse32.v v8, (t3)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (t3)
; REMAT-NEXT: add t2, a1, t2
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (t2)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add t1, a1, t1
+; REMAT-NEXT: vse32.v v8, (t1)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (t1)
; REMAT-NEXT: add t0, a1, t0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (t0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add a7, a1, a7
+; REMAT-NEXT: vse32.v v8, (a7)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a7)
; REMAT-NEXT: add a6, a1, a6
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a6)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add a5, a1, a5
+; REMAT-NEXT: vse32.v v8, (a5)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a5)
; REMAT-NEXT: add a4, a1, a4
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a4)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: add a3, a1, a3
+; REMAT-NEXT: vse32.v v8, (a3)
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a3)
; REMAT-NEXT: add a2, a1, a2
; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a2)
@@ -1449,13 +1512,13 @@ define void @test(ptr %0, ptr %1, i64 %2) {
; REMAT-NEXT: vse32.v v10, (a0)
; REMAT-NEXT: lui a0, 10
; REMAT-NEXT: add a0, a1, a0
-; REMAT-NEXT: sf.vc.v.i 2, 0, v10, 0
; REMAT-NEXT: vse32.v v8, (a0)
; REMAT-NEXT: lui a0, 10
; REMAT-NEXT: addiw a0, a0, 512
; REMAT-NEXT: add a0, a1, a0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT: vse32.v v10, (a0)
+; REMAT-NEXT: vse32.v v8, (a0)
+; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0
; REMAT-NEXT: ld ra, 104(sp) # 8-byte Folded Reload
; REMAT-NEXT: ld s0, 96(sp) # 8-byte Folded Reload
More information about the llvm-commits
mailing list