[llvm] 33d804c - [RISCV] Allow VCIX with SE to reorder (#77049)

via llvm-commits llvm-commits at lists.llvm.org
Tue Jan 23 19:30:16 PST 2024


Author: Brandon Wu
Date: 2024-01-24T11:30:12+08:00
New Revision: 33d804c6c2786cbbbc13743060f08d679941e0a4

URL: https://github.com/llvm/llvm-project/commit/33d804c6c2786cbbbc13743060f08d679941e0a4
DIFF: https://github.com/llvm/llvm-project/commit/33d804c6c2786cbbbc13743060f08d679941e0a4.diff

LOG: [RISCV] Allow VCIX with SE to reorder (#77049)

This patch allows VCIX instructions that have side effect to be
reordered
with memory and other side effecting instructions. However we don't want
VCIX instructions to be reordered with each other, so we propose a dummy
register called VCIX_STATE and make these instructions implicitly define
and use
it.

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/lib/Target/RISCV/RISCVISelLowering.h
    llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
    llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
    llvm/lib/Target/RISCV/RISCVRegisterInfo.td
    llvm/test/CodeGen/RISCV/pr69586.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 47c6cd6e5487b80..56a5ab14a4a9f8d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8450,25 +8450,63 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
 }
 
-static void getVCIXOperands(SDValue &Op, SelectionDAG &DAG,
-                            SmallVector<SDValue> &Ops) {
+static inline void promoteVCIXScalar(const SDValue &Op,
+                                     SmallVectorImpl<SDValue> &Operands,
+                                     SelectionDAG &DAG) {
+  const RISCVSubtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
+
+  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
+                  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
+  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
   SDLoc DL(Op);
 
+  const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
+      RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
+  if (!II || !II->hasScalarOperand())
+    return;
+
+  unsigned SplatOp = II->ScalarOperand + 1;
+  assert(SplatOp < Op.getNumOperands());
+
+  SDValue &ScalarOp = Operands[SplatOp];
+  MVT OpVT = ScalarOp.getSimpleValueType();
+  MVT XLenVT = Subtarget.getXLenVT();
+
+  // The code below is partially copied from lowerVectorIntrinsicScalars.
+  // If this isn't a scalar, or its type is XLenVT we're done.
+  if (!OpVT.isScalarInteger() || OpVT == XLenVT)
+    return;
+
+  // Manually emit promote operation for scalar operation.
+  if (OpVT.bitsLT(XLenVT)) {
+    unsigned ExtOpc =
+        isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
+    ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
+  }
+
+  return;
+}
+
+static void processVCIXOperands(SDValue &OrigOp,
+                                SmallVectorImpl<SDValue> &Operands,
+                                SelectionDAG &DAG) {
+  promoteVCIXScalar(OrigOp, Operands, DAG);
   const RISCVSubtarget &Subtarget =
       DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
-  for (const SDValue &V : Op->op_values()) {
+  for (SDValue &V : Operands) {
     EVT ValType = V.getValueType();
-    if (ValType.isScalableVector() && ValType.isFloatingPoint()) {
+    if (ValType.isVector() && ValType.isFloatingPoint()) {
       MVT InterimIVT =
           MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
                            ValType.getVectorElementCount());
-      Ops.push_back(DAG.getBitcast(InterimIVT, V));
-    } else if (ValType.isFixedLengthVector()) {
+      V = DAG.getBitcast(InterimIVT, V);
+    }
+    if (ValType.isFixedLengthVector()) {
       MVT OpContainerVT = getContainerForFixedLengthVector(
           DAG, V.getSimpleValueType(), Subtarget);
-      Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
-    } else
-      Ops.push_back(V);
+      V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
+    }
   }
 }
 
@@ -8702,8 +8740,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::riscv_sf_vc_v_fvw: {
     MVT VT = Op.getSimpleValueType();
 
-    SmallVector<SDValue> Ops;
-    getVCIXOperands(Op, DAG, Ops);
+    SmallVector<SDValue> Operands{Op->op_values()};
+    processVCIXOperands(Op, Operands, DAG);
 
     MVT RetVT = VT;
     if (VT.isFixedLengthVector())
@@ -8712,7 +8750,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
                                VT.getVectorElementCount());
 
-    SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Ops);
+    SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
 
     if (VT.isFixedLengthVector())
       NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
@@ -8729,6 +8767,52 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
 }
 
+static inline SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG,
+                                           unsigned Type) {
+  SDLoc DL(Op);
+  SmallVector<SDValue> Operands{Op->op_values()};
+  Operands.erase(Operands.begin() + 1);
+
+  const RISCVSubtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
+  MVT VT = Op.getSimpleValueType();
+  MVT RetVT = VT;
+  MVT FloatVT = VT;
+
+  if (VT.isFloatingPoint()) {
+    RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
+                             VT.getVectorElementCount());
+    FloatVT = RetVT;
+  }
+  if (VT.isFixedLengthVector())
+    RetVT = getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), RetVT,
+                                             Subtarget);
+
+  processVCIXOperands(Op, Operands, DAG);
+
+  SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
+  SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
+  SDValue Chain = NewNode.getValue(1);
+
+  if (VT.isFixedLengthVector())
+    NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
+  if (VT.isFloatingPoint())
+    NewNode = DAG.getBitcast(VT, NewNode);
+
+  NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
+
+  return NewNode;
+}
+
+static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG,
+                                         unsigned Type) {
+  SmallVector<SDValue> Operands{Op->op_values()};
+  Operands.erase(Operands.begin() + 1);
+  processVCIXOperands(Op, Operands, DAG);
+
+  return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
+}
+
 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
                                                     SelectionDAG &DAG) const {
   unsigned IntNo = Op.getConstantOperandVal(1);
@@ -8846,48 +8930,33 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     return DAG.getMergeValues(Results, DL);
   }
   case Intrinsic::riscv_sf_vc_v_x_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
   case Intrinsic::riscv_sf_vc_v_i_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
   case Intrinsic::riscv_sf_vc_v_xv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
   case Intrinsic::riscv_sf_vc_v_iv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
   case Intrinsic::riscv_sf_vc_v_vv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
   case Intrinsic::riscv_sf_vc_v_fv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
   case Intrinsic::riscv_sf_vc_v_xvv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
   case Intrinsic::riscv_sf_vc_v_ivv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
   case Intrinsic::riscv_sf_vc_v_vvv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
   case Intrinsic::riscv_sf_vc_v_fvv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
   case Intrinsic::riscv_sf_vc_v_xvw_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
   case Intrinsic::riscv_sf_vc_v_ivw_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
   case Intrinsic::riscv_sf_vc_v_vvw_se:
-  case Intrinsic::riscv_sf_vc_v_fvw_se: {
-    MVT VT = Op.getSimpleValueType();
-    SDLoc DL(Op);
-    SmallVector<SDValue> Ops;
-    getVCIXOperands(Op, DAG, Ops);
-
-    MVT RetVT = VT;
-    if (VT.isFixedLengthVector())
-      RetVT = getContainerForFixedLengthVector(VT);
-    else if (VT.isFloatingPoint())
-      RetVT = MVT::getVectorVT(MVT::getIntegerVT(RetVT.getScalarSizeInBits()),
-                               RetVT.getVectorElementCount());
-
-    SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
-    SDValue NewNode = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops);
-
-    if (VT.isFixedLengthVector()) {
-      SDValue FixedVector =
-          convertFromScalableVector(VT, NewNode, DAG, Subtarget);
-      NewNode = DAG.getMergeValues({FixedVector, NewNode.getValue(1)}, DL);
-    } else if (VT.isFloatingPoint()) {
-      SDValue BitCast = DAG.getBitcast(VT, NewNode.getValue(0));
-      NewNode = DAG.getMergeValues({BitCast, NewNode.getValue(1)}, DL);
-    }
-
-    if (Op == NewNode)
-      break;
-
-    return NewNode;
-  }
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
+  case Intrinsic::riscv_sf_vc_v_fvw_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
   }
 
   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
@@ -8977,72 +9046,117 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
         FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
   }
   case Intrinsic::riscv_sf_vc_x_se_e8mf8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8MF8);
   case Intrinsic::riscv_sf_vc_x_se_e8mf4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8MF4);
   case Intrinsic::riscv_sf_vc_x_se_e8mf2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8MF2);
   case Intrinsic::riscv_sf_vc_x_se_e8m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8M1);
   case Intrinsic::riscv_sf_vc_x_se_e8m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8M2);
   case Intrinsic::riscv_sf_vc_x_se_e8m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8M4);
   case Intrinsic::riscv_sf_vc_x_se_e8m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8M8);
   case Intrinsic::riscv_sf_vc_x_se_e16mf4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16MF4);
   case Intrinsic::riscv_sf_vc_x_se_e16mf2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16MF2);
   case Intrinsic::riscv_sf_vc_x_se_e16m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16M1);
   case Intrinsic::riscv_sf_vc_x_se_e16m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16M2);
   case Intrinsic::riscv_sf_vc_x_se_e16m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16M4);
   case Intrinsic::riscv_sf_vc_x_se_e16m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16M8);
   case Intrinsic::riscv_sf_vc_x_se_e32mf2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32MF2);
   case Intrinsic::riscv_sf_vc_x_se_e32m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32M1);
   case Intrinsic::riscv_sf_vc_x_se_e32m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32M2);
   case Intrinsic::riscv_sf_vc_x_se_e32m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32M4);
   case Intrinsic::riscv_sf_vc_x_se_e32m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32M8);
   case Intrinsic::riscv_sf_vc_x_se_e64m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E64M1);
   case Intrinsic::riscv_sf_vc_x_se_e64m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E64M2);
   case Intrinsic::riscv_sf_vc_x_se_e64m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E64M4);
   case Intrinsic::riscv_sf_vc_x_se_e64m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E64M8);
   case Intrinsic::riscv_sf_vc_i_se_e8mf8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8MF8);
   case Intrinsic::riscv_sf_vc_i_se_e8mf4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8MF4);
   case Intrinsic::riscv_sf_vc_i_se_e8mf2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8MF2);
   case Intrinsic::riscv_sf_vc_i_se_e8m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8M1);
   case Intrinsic::riscv_sf_vc_i_se_e8m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8M2);
   case Intrinsic::riscv_sf_vc_i_se_e8m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8M4);
   case Intrinsic::riscv_sf_vc_i_se_e8m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8M8);
   case Intrinsic::riscv_sf_vc_i_se_e16mf4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16MF4);
   case Intrinsic::riscv_sf_vc_i_se_e16mf2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16MF2);
   case Intrinsic::riscv_sf_vc_i_se_e16m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16M1);
   case Intrinsic::riscv_sf_vc_i_se_e16m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16M2);
   case Intrinsic::riscv_sf_vc_i_se_e16m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16M4);
   case Intrinsic::riscv_sf_vc_i_se_e16m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16M8);
   case Intrinsic::riscv_sf_vc_i_se_e32mf2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32MF2);
   case Intrinsic::riscv_sf_vc_i_se_e32m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32M1);
   case Intrinsic::riscv_sf_vc_i_se_e32m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32M2);
   case Intrinsic::riscv_sf_vc_i_se_e32m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32M4);
   case Intrinsic::riscv_sf_vc_i_se_e32m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32M8);
   case Intrinsic::riscv_sf_vc_i_se_e64m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E64M1);
   case Intrinsic::riscv_sf_vc_i_se_e64m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E64M2);
   case Intrinsic::riscv_sf_vc_i_se_e64m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E64M4);
   case Intrinsic::riscv_sf_vc_i_se_e64m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E64M8);
   case Intrinsic::riscv_sf_vc_xv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
   case Intrinsic::riscv_sf_vc_iv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
   case Intrinsic::riscv_sf_vc_vv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
   case Intrinsic::riscv_sf_vc_fv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
   case Intrinsic::riscv_sf_vc_xvv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
   case Intrinsic::riscv_sf_vc_ivv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
   case Intrinsic::riscv_sf_vc_vvv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
   case Intrinsic::riscv_sf_vc_fvv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
   case Intrinsic::riscv_sf_vc_xvw_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
   case Intrinsic::riscv_sf_vc_ivw_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
   case Intrinsic::riscv_sf_vc_vvw_se:
-  case Intrinsic::riscv_sf_vc_fvw_se: {
-    SmallVector<SDValue> Ops;
-    getVCIXOperands(Op, DAG, Ops);
-
-    SDValue NewNode =
-        DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops);
-
-    if (Op == NewNode)
-      break;
-
-    return NewNode;
-  }
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
+  case Intrinsic::riscv_sf_vc_fvw_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
   }
 
   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
@@ -19013,6 +19127,76 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(SWAP_CSR)
   NODE_NAME_CASE(CZERO_EQZ)
   NODE_NAME_CASE(CZERO_NEZ)
+  NODE_NAME_CASE(SF_VC_X_SE_E8MF8)
+  NODE_NAME_CASE(SF_VC_X_SE_E8MF4)
+  NODE_NAME_CASE(SF_VC_X_SE_E8MF2)
+  NODE_NAME_CASE(SF_VC_X_SE_E8M1)
+  NODE_NAME_CASE(SF_VC_X_SE_E8M2)
+  NODE_NAME_CASE(SF_VC_X_SE_E8M4)
+  NODE_NAME_CASE(SF_VC_X_SE_E8M8)
+  NODE_NAME_CASE(SF_VC_X_SE_E16MF4)
+  NODE_NAME_CASE(SF_VC_X_SE_E16MF2)
+  NODE_NAME_CASE(SF_VC_X_SE_E16M1)
+  NODE_NAME_CASE(SF_VC_X_SE_E16M2)
+  NODE_NAME_CASE(SF_VC_X_SE_E16M4)
+  NODE_NAME_CASE(SF_VC_X_SE_E16M8)
+  NODE_NAME_CASE(SF_VC_X_SE_E32MF2)
+  NODE_NAME_CASE(SF_VC_X_SE_E32M1)
+  NODE_NAME_CASE(SF_VC_X_SE_E32M2)
+  NODE_NAME_CASE(SF_VC_X_SE_E32M4)
+  NODE_NAME_CASE(SF_VC_X_SE_E32M8)
+  NODE_NAME_CASE(SF_VC_X_SE_E64M1)
+  NODE_NAME_CASE(SF_VC_X_SE_E64M2)
+  NODE_NAME_CASE(SF_VC_X_SE_E64M4)
+  NODE_NAME_CASE(SF_VC_X_SE_E64M8)
+  NODE_NAME_CASE(SF_VC_I_SE_E8MF8)
+  NODE_NAME_CASE(SF_VC_I_SE_E8MF4)
+  NODE_NAME_CASE(SF_VC_I_SE_E8MF2)
+  NODE_NAME_CASE(SF_VC_I_SE_E8M1)
+  NODE_NAME_CASE(SF_VC_I_SE_E8M2)
+  NODE_NAME_CASE(SF_VC_I_SE_E8M4)
+  NODE_NAME_CASE(SF_VC_I_SE_E8M8)
+  NODE_NAME_CASE(SF_VC_I_SE_E16MF4)
+  NODE_NAME_CASE(SF_VC_I_SE_E16MF2)
+  NODE_NAME_CASE(SF_VC_I_SE_E16M1)
+  NODE_NAME_CASE(SF_VC_I_SE_E16M2)
+  NODE_NAME_CASE(SF_VC_I_SE_E16M4)
+  NODE_NAME_CASE(SF_VC_I_SE_E16M8)
+  NODE_NAME_CASE(SF_VC_I_SE_E32MF2)
+  NODE_NAME_CASE(SF_VC_I_SE_E32M1)
+  NODE_NAME_CASE(SF_VC_I_SE_E32M2)
+  NODE_NAME_CASE(SF_VC_I_SE_E32M4)
+  NODE_NAME_CASE(SF_VC_I_SE_E32M8)
+  NODE_NAME_CASE(SF_VC_I_SE_E64M1)
+  NODE_NAME_CASE(SF_VC_I_SE_E64M2)
+  NODE_NAME_CASE(SF_VC_I_SE_E64M4)
+  NODE_NAME_CASE(SF_VC_I_SE_E64M8)
+  NODE_NAME_CASE(SF_VC_XV_SE)
+  NODE_NAME_CASE(SF_VC_IV_SE)
+  NODE_NAME_CASE(SF_VC_VV_SE)
+  NODE_NAME_CASE(SF_VC_FV_SE)
+  NODE_NAME_CASE(SF_VC_XVV_SE)
+  NODE_NAME_CASE(SF_VC_IVV_SE)
+  NODE_NAME_CASE(SF_VC_VVV_SE)
+  NODE_NAME_CASE(SF_VC_FVV_SE)
+  NODE_NAME_CASE(SF_VC_XVW_SE)
+  NODE_NAME_CASE(SF_VC_IVW_SE)
+  NODE_NAME_CASE(SF_VC_VVW_SE)
+  NODE_NAME_CASE(SF_VC_FVW_SE)
+  NODE_NAME_CASE(SF_VC_V_X_SE)
+  NODE_NAME_CASE(SF_VC_V_I_SE)
+  NODE_NAME_CASE(SF_VC_V_XV_SE)
+  NODE_NAME_CASE(SF_VC_V_IV_SE)
+  NODE_NAME_CASE(SF_VC_V_VV_SE)
+  NODE_NAME_CASE(SF_VC_V_FV_SE)
+  NODE_NAME_CASE(SF_VC_V_XVV_SE)
+  NODE_NAME_CASE(SF_VC_V_IVV_SE)
+  NODE_NAME_CASE(SF_VC_V_VVV_SE)
+  NODE_NAME_CASE(SF_VC_V_FVV_SE)
+  NODE_NAME_CASE(SF_VC_V_XVW_SE)
+  NODE_NAME_CASE(SF_VC_V_IVW_SE)
+  NODE_NAME_CASE(SF_VC_V_VVW_SE)
+  NODE_NAME_CASE(SF_VC_V_FVW_SE)
   }
   // clang-format on
   return nullptr;

diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 30b9ad7e6f6f326..8d2fdbb9d40b4a7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -415,6 +415,77 @@ enum NodeType : unsigned {
   STRICT_VFROUND_NOEXCEPT_VL,
   LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL,
 
+  SF_VC_X_SE_E8MF8,
+  SF_VC_X_SE_E8MF4,
+  SF_VC_X_SE_E8MF2,
+  SF_VC_X_SE_E8M1,
+  SF_VC_X_SE_E8M2,
+  SF_VC_X_SE_E8M4,
+  SF_VC_X_SE_E8M8,
+  SF_VC_X_SE_E16MF4,
+  SF_VC_X_SE_E16MF2,
+  SF_VC_X_SE_E16M1,
+  SF_VC_X_SE_E16M2,
+  SF_VC_X_SE_E16M4,
+  SF_VC_X_SE_E16M8,
+  SF_VC_X_SE_E32MF2,
+  SF_VC_X_SE_E32M1,
+  SF_VC_X_SE_E32M2,
+  SF_VC_X_SE_E32M4,
+  SF_VC_X_SE_E32M8,
+  SF_VC_X_SE_E64M1,
+  SF_VC_X_SE_E64M2,
+  SF_VC_X_SE_E64M4,
+  SF_VC_X_SE_E64M8,
+  SF_VC_I_SE_E8MF8,
+  SF_VC_I_SE_E8MF4,
+  SF_VC_I_SE_E8MF2,
+  SF_VC_I_SE_E8M1,
+  SF_VC_I_SE_E8M2,
+  SF_VC_I_SE_E8M4,
+  SF_VC_I_SE_E8M8,
+  SF_VC_I_SE_E16MF4,
+  SF_VC_I_SE_E16MF2,
+  SF_VC_I_SE_E16M1,
+  SF_VC_I_SE_E16M2,
+  SF_VC_I_SE_E16M4,
+  SF_VC_I_SE_E16M8,
+  SF_VC_I_SE_E32MF2,
+  SF_VC_I_SE_E32M1,
+  SF_VC_I_SE_E32M2,
+  SF_VC_I_SE_E32M4,
+  SF_VC_I_SE_E32M8,
+  SF_VC_I_SE_E64M1,
+  SF_VC_I_SE_E64M2,
+  SF_VC_I_SE_E64M4,
+  SF_VC_I_SE_E64M8,
+  SF_VC_XV_SE,
+  SF_VC_IV_SE,
+  SF_VC_VV_SE,
+  SF_VC_FV_SE,
+  SF_VC_XVV_SE,
+  SF_VC_IVV_SE,
+  SF_VC_VVV_SE,
+  SF_VC_FVV_SE,
+  SF_VC_XVW_SE,
+  SF_VC_IVW_SE,
+  SF_VC_VVW_SE,
+  SF_VC_FVW_SE,
+  SF_VC_V_X_SE,
+  SF_VC_V_I_SE,
+  SF_VC_V_XV_SE,
+  SF_VC_V_IV_SE,
+  SF_VC_V_VV_SE,
+  SF_VC_V_FV_SE,
+  SF_VC_V_XVV_SE,
+  SF_VC_V_IVV_SE,
+  SF_VC_V_VVV_SE,
+  SF_VC_V_FVV_SE,
+  SF_VC_V_XVW_SE,
+  SF_VC_V_IVW_SE,
+  SF_VC_V_VVW_SE,
+  SF_VC_V_FVW_SE,
+
   // WARNING: Do not add anything in the end unless you want the node to
   // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
   // opcodes will be thought as target memory ops!

diff  --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 31f832dfd84cee7..d22f98d693b1bf1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -221,8 +221,8 @@ let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf
   def VFNRCLIP_XU_F_QF : CustomSiFiveVFNRCLIP<0b100010, OPFVF, "sf.vfnrclip.xu.f.qf">;
   def VFNRCLIP_X_F_QF : CustomSiFiveVFNRCLIP<0b100011, OPFVF, "sf.vfnrclip.x.f.qf">;
 }
-class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class,
-                  bit HasSideEffect = 1> :
+
+class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class> :
       Pseudo<(outs),
              (ins OpClass:$op1, payload5:$rs2, payload5:$rd, RS1Class:$r1,
                   AVL:$vl, ixlenimm:$sew), []>,
@@ -231,12 +231,11 @@ class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class,
   let mayStore = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
-  let hasSideEffects = HasSideEffect;
+  let hasSideEffects = 0;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
-class VPseudoVC_XV<Operand OpClass, VReg RS2Class, DAGOperand RS1Class,
-                   bit HasSideEffect = 1> :
+class VPseudoVC_XV<Operand OpClass, VReg RS2Class, DAGOperand RS1Class> :
       Pseudo<(outs),
              (ins OpClass:$op1, payload5:$rd, RS2Class:$rs2, RS1Class:$r1,
                   AVL:$vl, ixlenimm:$sew), []>,
@@ -245,12 +244,12 @@ class VPseudoVC_XV<Operand OpClass, VReg RS2Class, DAGOperand RS1Class,
   let mayStore = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
-  let hasSideEffects = HasSideEffect;
+  let hasSideEffects = 0;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
 class VPseudoVC_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
-                    DAGOperand RS1Class, bit HasSideEffect = 1> :
+                    DAGOperand RS1Class> :
       Pseudo<(outs),
              (ins OpClass:$op1, RDClass:$rd, RS2Class:$rs2, RS1Class:$r1,
                   AVL:$vl, ixlenimm:$sew), []>,
@@ -259,12 +258,11 @@ class VPseudoVC_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
   let mayStore = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
-  let hasSideEffects = HasSideEffect;
+  let hasSideEffects = 0;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
-class VPseudoVC_V_X<Operand OpClass, VReg RDClass, DAGOperand RS1Class,
-                    bit HasSideEffect = 1> :
+class VPseudoVC_V_X<Operand OpClass, VReg RDClass, DAGOperand RS1Class> :
       Pseudo<(outs RDClass:$rd),
              (ins OpClass:$op1, payload5:$rs2, RS1Class:$r1,
                   AVL:$vl, ixlenimm:$sew), []>,
@@ -273,12 +271,12 @@ class VPseudoVC_V_X<Operand OpClass, VReg RDClass, DAGOperand RS1Class,
   let mayStore = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
-  let hasSideEffects = HasSideEffect;
+  let hasSideEffects = 0;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
 class VPseudoVC_V_XV<Operand OpClass, VReg RDClass, VReg RS2Class,
-                     DAGOperand RS1Class, bit HasSideEffect = 1> :
+                     DAGOperand RS1Class> :
       Pseudo<(outs RDClass:$rd),
              (ins OpClass:$op1, RS2Class:$rs2, RS1Class:$r1,
                   AVL:$vl, ixlenimm:$sew), []>,
@@ -287,12 +285,12 @@ class VPseudoVC_V_XV<Operand OpClass, VReg RDClass, VReg RS2Class,
   let mayStore = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
-  let hasSideEffects = HasSideEffect;
+  let hasSideEffects = 0;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
 class VPseudoVC_V_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
-                      DAGOperand RS1Class, bit HasSideEffect = 1> :
+                      DAGOperand RS1Class> :
       Pseudo<(outs RDClass:$rd),
              (ins OpClass:$op1, RDClass:$rs3, RS2Class:$rs2, RS1Class:$r1,
                   AVL:$vl, ixlenimm:$sew), []>,
@@ -301,44 +299,52 @@ class VPseudoVC_V_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
   let mayStore = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
-  let hasSideEffects = HasSideEffect;
+  let hasSideEffects = 0;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
 multiclass VPseudoVC_X<LMULInfo m, DAGOperand RS1Class,
                        Operand OpClass = payload2> {
   let VLMul = m.value in {
-    def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_X<OpClass, RS1Class>;
-    def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
-    def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class, 0>;
+    let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
+      def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_X<OpClass, RS1Class>;
+      def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
+    }
+    def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
   }
 }
 
 multiclass VPseudoVC_XV<LMULInfo m, DAGOperand RS1Class,
                         Operand OpClass = payload2> {
   let VLMul = m.value in {
-    def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>;
-    def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
-    def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class, 0>;
+    let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
+      def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>;
+      def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+    }
+    def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
   }
 }
 
 multiclass VPseudoVC_XVV<LMULInfo m, DAGOperand RS1Class,
                          Operand OpClass = payload2> {
   let VLMul = m.value in {
-    def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
-    def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
-    def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class, 0>;
+    let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
+      def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+      def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+    }
+    def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
   }
 }
 
 multiclass VPseudoVC_XVW<LMULInfo m, DAGOperand RS1Class,
                          Operand OpClass = payload2> {
   let VLMul = m.value in {
+    let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in
     def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
     let Constraints = "@earlyclobber $rd, $rd = $rs3" in {
+      let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in
       def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
-      def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class, 0>;
+      def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
     }
   }
 }
@@ -428,6 +434,149 @@ let Predicates = [HasVendorXSfvfnrclipxfqf] in {
   defm VFNRCLIP_X_F_QF : VPseudoSiFiveVFNRCLIP;
 }
 
+// SDNode
+def SDT_SF_VC_X : SDTypeProfile<0, 5, [SDTCisSameAs<0, 1>,
+                                       SDTCisVT<0, XLenVT>,
+                                       SDTCisSameAs<0, 2>,
+                                       SDTCisSameAs<0, 3>,
+                                       SDTCisSameAs<0, 4>]>;
+
+def SDT_SF_VC_V_X : SDTypeProfile<1, 4, [SDTCisVec<0>,
+                                         SDTCisVT<1, XLenVT>,
+                                         SDTCisSameAs<1, 2>,
+                                         SDTCisSameAs<1, 3>,
+                                         SDTCisSameAs<1, 4>]>;
+
+def SDT_SF_VC_XV : SDTypeProfile<0, 5, [SDTCisSameAs<0, 1>,
+                                        SDTCisVec<2>,
+                                        SDTCisSameAs<0, 4>,
+                                        SDTCisVT<0, XLenVT>]>;
+
+def SDT_SF_VC_V_XV : SDTypeProfile<1, 4, [SDTCisVec<0>,
+                                          SDTCisVT<1, XLenVT>,
+                                          SDTCisSameAs<0, 2>,
+                                          SDTCisSameAs<1, 4>]>;
+
+def SDT_SF_VC_XVV : SDTypeProfile<0, 5, [SDTCisVT<0, XLenVT>,
+                                         SDTCisVec<1>,
+                                         SDTCisSameAs<1, 2>,
+                                         SDTCisSameAs<0, 4>]>;
+
+def SDT_SF_VC_V_XVV : SDTypeProfile<1, 5, [SDTCisVec<0>,
+                                           SDTCisVT<1, XLenVT>,
+                                           SDTCisSameAs<0, 2>,
+                                           SDTCisSameAs<0, 3>,
+                                           SDTCisSameAs<1, 5>]>;
+
+def SDT_SF_VC_XVW : SDTypeProfile<0, 5, [SDTCisVT<0, XLenVT>,
+                                         SDTCisVec<1>, SDTCisVec<2>,
+                                         SDTCisSameAs<0, 4>]>;
+
+def SDT_SF_VC_V_XVW : SDTypeProfile<1, 5, [SDTCisVec<0>,
+                                           SDTCisVT<1, XLenVT>,
+                                           SDTCisSameAs<0, 2>,
+                                           SDTCisVec<3>,
+                                           SDTCisSameAs<1, 5>]>;
+
+foreach vti = AllIntegerVectors in {
+  def sf_vc_x_e#vti.SEW#!tolower(vti.LMul.MX) : SDNode<"RISCVISD::SF_VC_X_SE_E"#vti.SEW#vti.LMul.MX, SDT_SF_VC_X, [SDNPHasChain]>;
+  def sf_vc_i_e#vti.SEW#!tolower(vti.LMul.MX) : SDNode<"RISCVISD::SF_VC_I_SE_E"#vti.SEW#vti.LMul.MX, SDT_SF_VC_X, [SDNPHasChain]>;
+}
+def sf_vc_v_x_se : SDNode<"RISCVISD::SF_VC_V_X_SE", SDT_SF_VC_V_X, [SDNPHasChain]>;
+def sf_vc_v_i_se : SDNode<"RISCVISD::SF_VC_V_I_SE", SDT_SF_VC_V_X, [SDNPHasChain]>;
+def sf_vc_vv_se : SDNode<"RISCVISD::SF_VC_VV_SE", SDT_SF_VC_XV, [SDNPHasChain]>;
+def sf_vc_xv_se : SDNode<"RISCVISD::SF_VC_XV_SE", SDT_SF_VC_XV, [SDNPHasChain]>;
+def sf_vc_iv_se : SDNode<"RISCVISD::SF_VC_IV_SE", SDT_SF_VC_XV, [SDNPHasChain]>;
+def sf_vc_fv_se : SDNode<"RISCVISD::SF_VC_FV_SE", SDT_SF_VC_XV, [SDNPHasChain]>;
+def sf_vc_v_vv_se : SDNode<"RISCVISD::SF_VC_V_VV_SE", SDT_SF_VC_V_XV, [SDNPHasChain]>;
+def sf_vc_v_xv_se : SDNode<"RISCVISD::SF_VC_V_XV_SE", SDT_SF_VC_V_XV, [SDNPHasChain]>;
+def sf_vc_v_iv_se : SDNode<"RISCVISD::SF_VC_V_IV_SE", SDT_SF_VC_V_XV, [SDNPHasChain]>;
+def sf_vc_v_fv_se : SDNode<"RISCVISD::SF_VC_V_FV_SE", SDT_SF_VC_V_XV, [SDNPHasChain]>;
+def sf_vc_vvv_se : SDNode<"RISCVISD::SF_VC_VVV_SE", SDT_SF_VC_XVV, [SDNPHasChain]>;
+def sf_vc_xvv_se : SDNode<"RISCVISD::SF_VC_XVV_SE", SDT_SF_VC_XVV, [SDNPHasChain]>;
+def sf_vc_ivv_se : SDNode<"RISCVISD::SF_VC_IVV_SE", SDT_SF_VC_XVV, [SDNPHasChain]>;
+def sf_vc_fvv_se : SDNode<"RISCVISD::SF_VC_FVV_SE", SDT_SF_VC_XVV, [SDNPHasChain]>;
+def sf_vc_v_vvv_se : SDNode<"RISCVISD::SF_VC_V_VVV_SE", SDT_SF_VC_V_XVV, [SDNPHasChain]>;
+def sf_vc_v_xvv_se : SDNode<"RISCVISD::SF_VC_V_XVV_SE", SDT_SF_VC_V_XVV, [SDNPHasChain]>;
+def sf_vc_v_ivv_se : SDNode<"RISCVISD::SF_VC_V_IVV_SE", SDT_SF_VC_V_XVV, [SDNPHasChain]>;
+def sf_vc_v_fvv_se : SDNode<"RISCVISD::SF_VC_V_FVV_SE", SDT_SF_VC_V_XVV, [SDNPHasChain]>;
+def sf_vc_vvw_se : SDNode<"RISCVISD::SF_VC_VVW_SE", SDT_SF_VC_XVW, [SDNPHasChain]>;
+def sf_vc_xvw_se : SDNode<"RISCVISD::SF_VC_XVW_SE", SDT_SF_VC_XVW, [SDNPHasChain]>;
+def sf_vc_ivw_se : SDNode<"RISCVISD::SF_VC_IVW_SE", SDT_SF_VC_XVW, [SDNPHasChain]>;
+def sf_vc_fvw_se : SDNode<"RISCVISD::SF_VC_FVW_SE", SDT_SF_VC_XVW, [SDNPHasChain]>;
+def sf_vc_v_vvw_se : SDNode<"RISCVISD::SF_VC_V_VVW_SE", SDT_SF_VC_V_XVW, [SDNPHasChain]>;
+def sf_vc_v_xvw_se : SDNode<"RISCVISD::SF_VC_V_XVW_SE", SDT_SF_VC_V_XVW, [SDNPHasChain]>;
+def sf_vc_v_ivw_se : SDNode<"RISCVISD::SF_VC_V_IVW_SE", SDT_SF_VC_V_XVW, [SDNPHasChain]>;
+def sf_vc_v_fvw_se : SDNode<"RISCVISD::SF_VC_V_FVW_SE", SDT_SF_VC_V_XVW, [SDNPHasChain]>;
+
+class VPatVC_OP4_ISD<SDPatternOperator op,
+                     string inst,
+                     ValueType op2_type,
+                     ValueType op3_type,
+                     ValueType op4_type,
+                     int sew,
+                     DAGOperand op2_kind,
+                     DAGOperand op3_kind,
+                     DAGOperand op4_kind,
+                     Operand op1_kind = payload2> :
+  Pat<(op
+       (XLenVT   op1_kind:$op1),
+       (op2_type op2_kind:$op2),
+       (op3_type op3_kind:$op3),
+       (op4_type op4_kind:$op4),
+       VLOpFrag),
+      (!cast<Instruction>(inst)
+       (XLenVT   op1_kind:$op1),
+       (op2_type op2_kind:$op2),
+       (op3_type op3_kind:$op3),
+       (op4_type op4_kind:$op4),
+       GPR:$vl, sew)>;
+
+class VPatVC_V_OP4_ISD<SDPatternOperator op,
+                       string inst,
+                       ValueType result_type,
+                       ValueType op2_type,
+                       ValueType op3_type,
+                       ValueType op4_type,
+                       int sew,
+                       DAGOperand op2_kind,
+                       DAGOperand op3_kind,
+                       DAGOperand op4_kind,
+                       Operand op1_kind = payload2> :
+  Pat<(result_type (op
+                    (XLenVT   op1_kind:$op1),
+                    (op2_type op2_kind:$op2),
+                    (op3_type op3_kind:$op3),
+                    (op4_type op4_kind:$op4),
+                    VLOpFrag)),
+                   (!cast<Instruction>(inst)
+                    (XLenVT   op1_kind:$op1),
+                    (op2_type op2_kind:$op2),
+                    (op3_type op3_kind:$op3),
+                    (op4_type op4_kind:$op4),
+                    GPR:$vl, sew)>;
+
+
+class VPatVC_V_OP3_ISD<SDPatternOperator op,
+                       string inst,
+                       ValueType result_type,
+                       ValueType op2_type,
+                       ValueType op3_type,
+                       int sew,
+                       DAGOperand op2_kind,
+                       DAGOperand op3_kind,
+                       Operand op1_kind = payload2> :
+  Pat<(result_type (op
+                    (XLenVT   op1_kind:$op1),
+                    (op2_type op2_kind:$op2),
+                    (op3_type op3_kind:$op3),
+                    VLOpFrag)),
+                   (!cast<Instruction>(inst)
+                    (XLenVT   op1_kind:$op1),
+                    (op2_type op2_kind:$op2),
+                    (op3_type op3_kind:$op3),
+                    GPR:$vl, sew)>;
+
 class VPatVC_OP4<string intrinsic_name,
                  string inst,
                  ValueType op2_type,
@@ -497,14 +646,14 @@ class VPatVC_V_OP3<string intrinsic_name,
 
 multiclass VPatVC_X<string intrinsic_suffix, string instruction_suffix,
                     VTypeInfo vti, ValueType type, DAGOperand kind> {
-  def : VPatVC_OP4<"int_riscv_sf_vc_" # intrinsic_suffix # "_se_e" # vti.SEW # !tolower(vti.LMul.MX),
-                   "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
-                   XLenVT, XLenVT, type, vti.Log2SEW,
-                   payload5, payload5, kind>;
-  def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix # "_se",
-                     "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
-                     vti.Vector, XLenVT, type, vti.Log2SEW,
-                     payload5, kind>;
+  def : VPatVC_OP4_ISD<!cast<SDPatternOperator>("sf_vc_" # intrinsic_suffix # "_e" # vti.SEW # !tolower(vti.LMul.MX)),
+                       "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+                       XLenVT, XLenVT, type, vti.Log2SEW,
+                       payload5, payload5, kind>;
+  def : VPatVC_V_OP3_ISD<!cast<SDPatternOperator>("sf_vc_v_" # intrinsic_suffix # "_se"),
+                         "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+                         vti.Vector, XLenVT, type, vti.Log2SEW,
+                         payload5, kind>;
   def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix,
                      "PseudoVC_V_" # instruction_suffix # "_" # vti.LMul.MX,
                      vti.Vector, XLenVT, type, vti.Log2SEW,
@@ -514,14 +663,14 @@ multiclass VPatVC_X<string intrinsic_suffix, string instruction_suffix,
 multiclass VPatVC_XV<string intrinsic_suffix, string instruction_suffix,
                      VTypeInfo vti, ValueType type, DAGOperand kind,
                      Operand op1_kind = payload2> {
-  def : VPatVC_OP4<"int_riscv_sf_vc_" # intrinsic_suffix # "_se",
+  def : VPatVC_OP4_ISD<!cast<SDPatternOperator>("sf_vc_" # intrinsic_suffix # "_se"),
                    "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
                    XLenVT, vti.Vector, type, vti.Log2SEW,
                    payload5, vti.RegClass, kind, op1_kind>;
-  def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix # "_se",
-                     "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
-                     vti.Vector, vti.Vector, type, vti.Log2SEW,
-                     vti.RegClass, kind, op1_kind>;
+  def : VPatVC_V_OP3_ISD<!cast<SDPatternOperator>("sf_vc_v_" # intrinsic_suffix # "_se"),
+                         "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+                         vti.Vector, vti.Vector, type, vti.Log2SEW,
+                         vti.RegClass, kind, op1_kind>;
   def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix,
                      "PseudoVC_V_" # instruction_suffix # "_" # vti.LMul.MX,
                      vti.Vector, vti.Vector, type, vti.Log2SEW,
@@ -531,11 +680,11 @@ multiclass VPatVC_XV<string intrinsic_suffix, string instruction_suffix,
 multiclass VPatVC_XVV<string intrinsic_suffix, string instruction_suffix,
                       VTypeInfo wti, VTypeInfo vti, ValueType type, DAGOperand kind,
                       Operand op1_kind = payload2> {
-  def : VPatVC_OP4<"int_riscv_sf_vc_" # intrinsic_suffix # "_se",
+  def : VPatVC_OP4_ISD<!cast<SDPatternOperator>("sf_vc_" # intrinsic_suffix # "_se"),
                    "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
                    wti.Vector, vti.Vector, type, vti.Log2SEW,
                    wti.RegClass, vti.RegClass, kind, op1_kind>;
-  def : VPatVC_V_OP4<"int_riscv_sf_vc_v_" # intrinsic_suffix # "_se",
+  def : VPatVC_V_OP4_ISD<!cast<SDPatternOperator>("sf_vc_v_" # intrinsic_suffix # "_se"),
                      "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
                      wti.Vector, wti.Vector, vti.Vector, type, vti.Log2SEW,
                      wti.RegClass, vti.RegClass, kind, op1_kind>;

diff  --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index d7bb46a221dd269..30457f528853b8b 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -130,6 +130,9 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   markSuperRegs(Reserved, RISCV::FRM);
   markSuperRegs(Reserved, RISCV::FFLAGS);
 
+  // SiFive VCIX state registers.
+  markSuperRegs(Reserved, RISCV::VCIX_STATE);
+
   if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
     if (Subtarget.isRVE())
       report_fatal_error("Graal reserved registers do not exist in RVE");

diff  --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 5a4d8c4cfece7ff..193b85e28186072 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -607,3 +607,6 @@ def FRM    : RISCVReg<0, "frm">;
 
 // Shadow Stack register
 def SSP    : RISCVReg<0, "ssp">;
+
+// Dummy VCIX state register
+def VCIX_STATE : RISCVReg<0, "vcix_state">;

diff  --git a/llvm/test/CodeGen/RISCV/pr69586.ll b/llvm/test/CodeGen/RISCV/pr69586.ll
index ef91334c5ff0044..2d5fce2ca4970ef 100644
--- a/llvm/test/CodeGen/RISCV/pr69586.ll
+++ b/llvm/test/CodeGen/RISCV/pr69586.ll
@@ -7,21 +7,21 @@
 define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-LABEL: test:
 ; NOREMAT:       # %bb.0:
-; NOREMAT-NEXT:    addi sp, sp, -368
-; NOREMAT-NEXT:    .cfi_def_cfa_offset 368
-; NOREMAT-NEXT:    sd ra, 360(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s0, 352(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s1, 344(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s2, 336(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s3, 328(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s4, 320(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s5, 312(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s6, 304(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s7, 296(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s8, 288(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s9, 280(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s10, 272(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s11, 264(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    addi sp, sp, -400
+; NOREMAT-NEXT:    .cfi_def_cfa_offset 400
+; NOREMAT-NEXT:    sd ra, 392(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s0, 384(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s1, 376(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s2, 368(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s3, 360(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s4, 352(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s5, 344(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s6, 336(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s7, 328(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s8, 320(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s9, 312(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s10, 304(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s11, 296(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    .cfi_offset ra, -8
 ; NOREMAT-NEXT:    .cfi_offset s0, -16
 ; NOREMAT-NEXT:    .cfi_offset s1, -24
@@ -35,6 +35,11 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    .cfi_offset s9, -88
 ; NOREMAT-NEXT:    .cfi_offset s10, -96
 ; NOREMAT-NEXT:    .cfi_offset s11, -104
+; NOREMAT-NEXT:    csrr a2, vlenb
+; NOREMAT-NEXT:    li a3, 6
+; NOREMAT-NEXT:    mul a2, a2, a3
+; NOREMAT-NEXT:    sub sp, sp, a2
+; NOREMAT-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x03, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 400 + 6 * vlenb
 ; NOREMAT-NEXT:    li a2, 32
 ; NOREMAT-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
 ; NOREMAT-NEXT:    vle32.v v8, (a0)
@@ -50,670 +55,728 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    vle32.v v10, (a2)
 ; NOREMAT-NEXT:    li a2, 1
 ; NOREMAT-NEXT:    slli a2, a2, 11
-; NOREMAT-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a2, 272(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a0, a2
 ; NOREMAT-NEXT:    vle32.v v12, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
 ; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    li a4, 5
-; NOREMAT-NEXT:    slli a2, a4, 9
-; NOREMAT-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    li a5, 5
+; NOREMAT-NEXT:    slli a2, a5, 9
+; NOREMAT-NEXT:    sd a2, 264(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a0, a2
 ; NOREMAT-NEXT:    vle32.v v14, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
 ; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    li a5, 3
-; NOREMAT-NEXT:    slli a2, a5, 10
-; NOREMAT-NEXT:    sd a2, 240(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    li a2, 3
+; NOREMAT-NEXT:    slli a3, a2, 10
+; NOREMAT-NEXT:    sd a3, 256(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v12, (a3)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    li a3, 7
-; NOREMAT-NEXT:    slli a2, a3, 9
-; NOREMAT-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    vle32.v v8, (a3)
+; NOREMAT-NEXT:    li a4, 7
+; NOREMAT-NEXT:    slli a3, a4, 9
+; NOREMAT-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v14, (a3)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    lui a2, 1
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    vle32.v v10, (a3)
+; NOREMAT-NEXT:    lui a3, 1
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v12, (a3)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    li a2, 9
-; NOREMAT-NEXT:    slli a6, a2, 9
-; NOREMAT-NEXT:    sd a6, 224(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    vle32.v v8, (a3)
+; NOREMAT-NEXT:    li a3, 9
+; NOREMAT-NEXT:    slli a6, a3, 9
+; NOREMAT-NEXT:    sd a6, 240(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a6, a0, a6
 ; NOREMAT-NEXT:    vle32.v v14, (a6)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
 ; NOREMAT-NEXT:    vle32.v v10, (a6)
-; NOREMAT-NEXT:    slli a6, a4, 10
-; NOREMAT-NEXT:    sd a6, 216(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    slli a6, a5, 10
+; NOREMAT-NEXT:    sd a6, 232(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a6, a0, a6
 ; NOREMAT-NEXT:    vle32.v v12, (a6)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
 ; NOREMAT-NEXT:    vle32.v v8, (a6)
 ; NOREMAT-NEXT:    li s8, 11
 ; NOREMAT-NEXT:    slli a6, s8, 9
-; NOREMAT-NEXT:    sd a6, 208(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a6, 224(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a6, a0, a6
 ; NOREMAT-NEXT:    vle32.v v14, (a6)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
 ; NOREMAT-NEXT:    vle32.v v10, (a6)
-; NOREMAT-NEXT:    slli a5, a5, 11
-; NOREMAT-NEXT:    sd a5, 200(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a5, a0, a5
-; NOREMAT-NEXT:    vle32.v v12, (a5)
+; NOREMAT-NEXT:    slli a2, a2, 11
+; NOREMAT-NEXT:    sd a2, 216(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v12, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a5)
+; NOREMAT-NEXT:    vle32.v v8, (a2)
 ; NOREMAT-NEXT:    li s2, 13
-; NOREMAT-NEXT:    slli a5, s2, 9
-; NOREMAT-NEXT:    sd a5, 192(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a5, a0, a5
-; NOREMAT-NEXT:    vle32.v v14, (a5)
+; NOREMAT-NEXT:    slli a2, s2, 9
+; NOREMAT-NEXT:    sd a2, 208(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v14, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a5)
-; NOREMAT-NEXT:    slli a5, a3, 10
-; NOREMAT-NEXT:    sd a5, 184(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a5, a0, a5
-; NOREMAT-NEXT:    vle32.v v12, (a5)
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    slli a2, a4, 10
+; NOREMAT-NEXT:    sd a2, 200(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v12, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a5)
-; NOREMAT-NEXT:    li t0, 15
-; NOREMAT-NEXT:    slli a5, t0, 9
-; NOREMAT-NEXT:    sd a5, 176(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a5, a0, a5
-; NOREMAT-NEXT:    vle32.v v14, (a5)
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    li a2, 15
+; NOREMAT-NEXT:    slli a6, a2, 9
+; NOREMAT-NEXT:    sd a6, 192(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v26, (a6)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a5)
-; NOREMAT-NEXT:    lui a5, 2
+; NOREMAT-NEXT:    vle32.v v16, (a6)
+; NOREMAT-NEXT:    lui a6, 2
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v28, (a6)
+; NOREMAT-NEXT:    vle32.v v10, (a6)
+; NOREMAT-NEXT:    li a6, 17
+; NOREMAT-NEXT:    slli a6, a6, 9
+; NOREMAT-NEXT:    sd a6, 184(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    li t0, 17
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v30, (a6)
+; NOREMAT-NEXT:    vle32.v v18, (a6)
+; NOREMAT-NEXT:    slli a6, a3, 10
+; NOREMAT-NEXT:    sd a6, 176(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v0, (a6)
+; NOREMAT-NEXT:    vle32.v v20, (a6)
+; NOREMAT-NEXT:    li a6, 19
+; NOREMAT-NEXT:    slli a6, a6, 9
+; NOREMAT-NEXT:    sd a6, 168(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    li a7, 19
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v2, (a6)
+; NOREMAT-NEXT:    vle32.v v22, (a6)
+; NOREMAT-NEXT:    slli a5, a5, 11
+; NOREMAT-NEXT:    sd a5, 160(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v4, (a5)
 ; NOREMAT-NEXT:    vle32.v v12, (a5)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a5)
-; NOREMAT-NEXT:    li a5, 17
-; NOREMAT-NEXT:    slli a5, a5, 9
-; NOREMAT-NEXT:    sd a5, 168(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    li a7, 17
+; NOREMAT-NEXT:    li s10, 21
+; NOREMAT-NEXT:    slli a5, s10, 9
+; NOREMAT-NEXT:    sd a5, 152(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v24, (a5)
 ; NOREMAT-NEXT:    vle32.v v14, (a5)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a5)
-; NOREMAT-NEXT:    slli a5, a2, 10
-; NOREMAT-NEXT:    sd a5, 160(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v26
+; NOREMAT-NEXT:    slli a5, s8, 10
+; NOREMAT-NEXT:    sd a5, 144(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a5, a0, a5
-; NOREMAT-NEXT:    vle32.v v12, (a5)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v26, (a5)
 ; NOREMAT-NEXT:    vle32.v v8, (a5)
-; NOREMAT-NEXT:    li a5, 19
-; NOREMAT-NEXT:    slli a5, a5, 9
-; NOREMAT-NEXT:    sd a5, 152(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    li a6, 19
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v28
+; NOREMAT-NEXT:    li s6, 23
+; NOREMAT-NEXT:    slli a5, s6, 9
+; NOREMAT-NEXT:    sd a5, 136(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a5, a0, a5
-; NOREMAT-NEXT:    vle32.v v14, (a5)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v28, (a5)
+; NOREMAT-NEXT:    vle32.v v16, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v30
+; NOREMAT-NEXT:    lui a5, 3
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v30, (a5)
 ; NOREMAT-NEXT:    vle32.v v10, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v0
+; NOREMAT-NEXT:    li s3, 25
+; NOREMAT-NEXT:    slli a5, s3, 9
+; NOREMAT-NEXT:    sd a5, 128(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v0, (a5)
+; NOREMAT-NEXT:    vle32.v v18, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v2
+; NOREMAT-NEXT:    slli a5, s2, 10
+; NOREMAT-NEXT:    sd a5, 120(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v2, (a5)
+; NOREMAT-NEXT:    vle32.v v20, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v4
+; NOREMAT-NEXT:    li t5, 27
+; NOREMAT-NEXT:    slli a5, t5, 9
+; NOREMAT-NEXT:    sd a5, 112(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v4, (a5)
+; NOREMAT-NEXT:    vle32.v v22, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v24
 ; NOREMAT-NEXT:    slli a4, a4, 11
-; NOREMAT-NEXT:    sd a4, 144(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a4, 104(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
+; NOREMAT-NEXT:    vle32.v v24, (a4)
 ; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a4)
-; NOREMAT-NEXT:    li s10, 21
-; NOREMAT-NEXT:    slli a4, s10, 9
-; NOREMAT-NEXT:    sd a4, 136(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v26
+; NOREMAT-NEXT:    li t2, 29
+; NOREMAT-NEXT:    slli a4, t2, 9
+; NOREMAT-NEXT:    sd a4, 96(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
+; NOREMAT-NEXT:    vle32.v v26, (a4)
 ; NOREMAT-NEXT:    vle32.v v14, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a4)
-; NOREMAT-NEXT:    slli a4, s8, 10
-; NOREMAT-NEXT:    sd a4, 128(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v28
+; NOREMAT-NEXT:    slli a4, a2, 10
+; NOREMAT-NEXT:    sd a4, 88(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
-; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v28, (a4)
 ; NOREMAT-NEXT:    vle32.v v8, (a4)
-; NOREMAT-NEXT:    li s6, 23
-; NOREMAT-NEXT:    slli a4, s6, 9
-; NOREMAT-NEXT:    sd a4, 120(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a4, a0, a4
-; NOREMAT-NEXT:    vle32.v v14, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a4)
-; NOREMAT-NEXT:    lui a4, 3
+; NOREMAT-NEXT:    csrr a4, vlenb
+; NOREMAT-NEXT:    slli a4, a4, 2
+; NOREMAT-NEXT:    add a4, sp, a4
+; NOREMAT-NEXT:    addi a4, a4, 288
+; NOREMAT-NEXT:    vs2r.v v8, (a4) # Unknown-size Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v30
+; NOREMAT-NEXT:    li a5, 31
+; NOREMAT-NEXT:    slli a4, a5, 9
+; NOREMAT-NEXT:    sd a4, 80(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
-; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v30, (a4)
+; NOREMAT-NEXT:    vle32.v v16, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v0
+; NOREMAT-NEXT:    lui a6, 4
+; NOREMAT-NEXT:    add a4, a0, a6
+; NOREMAT-NEXT:    vle32.v v0, (a4)
 ; NOREMAT-NEXT:    vle32.v v8, (a4)
-; NOREMAT-NEXT:    li s3, 25
-; NOREMAT-NEXT:    slli a4, s3, 9
-; NOREMAT-NEXT:    sd a4, 112(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    csrr a4, vlenb
+; NOREMAT-NEXT:    slli a4, a4, 1
+; NOREMAT-NEXT:    add a4, sp, a4
+; NOREMAT-NEXT:    addi a4, a4, 288
+; NOREMAT-NEXT:    vs2r.v v8, (a4) # Unknown-size Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v2
+; NOREMAT-NEXT:    addiw a4, a6, 512
+; NOREMAT-NEXT:    sd a4, 72(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
-; NOREMAT-NEXT:    vle32.v v14, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a4)
-; NOREMAT-NEXT:    slli a4, s2, 10
-; NOREMAT-NEXT:    sd a4, 104(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    vle32.v v2, (a4)
+; NOREMAT-NEXT:    vle32.v v18, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v4
+; NOREMAT-NEXT:    slli a4, t0, 10
+; NOREMAT-NEXT:    sd a4, 64(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
-; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a4)
-; NOREMAT-NEXT:    li t5, 27
-; NOREMAT-NEXT:    slli a4, t5, 9
-; NOREMAT-NEXT:    sd a4, 96(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    vle32.v v4, (a4)
+; NOREMAT-NEXT:    vle32.v v20, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v24
+; NOREMAT-NEXT:    addiw a4, a6, 1536
+; NOREMAT-NEXT:    sd a4, 56(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
-; NOREMAT-NEXT:    vle32.v v14, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    vle32.v v6, (a4)
+; NOREMAT-NEXT:    vle32.v v22, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v26
 ; NOREMAT-NEXT:    slli a3, a3, 11
-; NOREMAT-NEXT:    sd a3, 88(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a3, 48(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a0, a3
 ; NOREMAT-NEXT:    vle32.v v12, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
 ; NOREMAT-NEXT:    vle32.v v8, (a3)
-; NOREMAT-NEXT:    li t2, 29
-; NOREMAT-NEXT:    slli a3, t2, 9
-; NOREMAT-NEXT:    sd a3, 80(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a3, a0, a3
-; NOREMAT-NEXT:    vle32.v v14, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a3)
-; NOREMAT-NEXT:    slli a3, t0, 10
-; NOREMAT-NEXT:    sd a3, 72(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    addi a3, sp, 288
+; NOREMAT-NEXT:    vs2r.v v8, (a3) # Unknown-size Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v28
+; NOREMAT-NEXT:    lui s1, 5
+; NOREMAT-NEXT:    addiw a3, s1, -1536
+; NOREMAT-NEXT:    sd a3, 40(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a0, a3
-; NOREMAT-NEXT:    vle32.v v12, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
 ; NOREMAT-NEXT:    vle32.v v8, (a3)
-; NOREMAT-NEXT:    li a5, 31
-; NOREMAT-NEXT:    slli a3, a5, 9
-; NOREMAT-NEXT:    sd a3, 64(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    vle32.v v24, (a3)
+; NOREMAT-NEXT:    csrr a3, vlenb
+; NOREMAT-NEXT:    slli a3, a3, 2
+; NOREMAT-NEXT:    add a3, sp, a3
+; NOREMAT-NEXT:    addi a3, a3, 288
+; NOREMAT-NEXT:    vl2r.v v10, (a3) # Unknown-size Folded Reload
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v30
+; NOREMAT-NEXT:    slli a3, a7, 10
+; NOREMAT-NEXT:    sd a3, 32(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a0, a3
-; NOREMAT-NEXT:    vle32.v v14, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
 ; NOREMAT-NEXT:    vle32.v v10, (a3)
-; NOREMAT-NEXT:    lui a4, 4
-; NOREMAT-NEXT:    add a3, a0, a4
-; NOREMAT-NEXT:    vle32.v v12, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a3)
-; NOREMAT-NEXT:    addiw a3, a4, 512
-; NOREMAT-NEXT:    sd a3, 56(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a3, a0, a3
 ; NOREMAT-NEXT:    vle32.v v14, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a3)
-; NOREMAT-NEXT:    slli a3, a7, 10
-; NOREMAT-NEXT:    sd a3, 48(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a3, a0, a3
-; NOREMAT-NEXT:    vle32.v v12, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a3)
-; NOREMAT-NEXT:    addiw a3, a4, 1536
-; NOREMAT-NEXT:    sd a3, 40(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v0
+; NOREMAT-NEXT:    addiw a3, s1, -512
+; NOREMAT-NEXT:    sd a3, 24(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a0, a3
-; NOREMAT-NEXT:    vle32.v v14, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a3)
-; NOREMAT-NEXT:    slli a2, a2, 11
-; NOREMAT-NEXT:    sd a2, 32(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    lui s1, 5
-; NOREMAT-NEXT:    addiw a2, s1, -1536
-; NOREMAT-NEXT:    sd a2, 24(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    slli a2, a6, 10
-; NOREMAT-NEXT:    sd a2, 16(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    addiw a2, s1, -512
-; NOREMAT-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    add a2, a0, s1
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    vle32.v v0, (a3)
+; NOREMAT-NEXT:    vle32.v v16, (a3)
+; NOREMAT-NEXT:    csrr a3, vlenb
+; NOREMAT-NEXT:    slli a3, a3, 1
+; NOREMAT-NEXT:    add a3, sp, a3
+; NOREMAT-NEXT:    addi a3, a3, 288
+; NOREMAT-NEXT:    vl2r.v v26, (a3) # Unknown-size Folded Reload
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v26, v2
+; NOREMAT-NEXT:    add a3, a0, s1
+; NOREMAT-NEXT:    vle32.v v26, (a3)
+; NOREMAT-NEXT:    vle32.v v28, (a3)
+; NOREMAT-NEXT:    csrr a3, vlenb
+; NOREMAT-NEXT:    slli a3, a3, 2
+; NOREMAT-NEXT:    add a3, sp, a3
+; NOREMAT-NEXT:    addi a3, a3, 288
+; NOREMAT-NEXT:    vs2r.v v28, (a3) # Unknown-size Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v4
 ; NOREMAT-NEXT:    addiw ra, s1, 512
-; NOREMAT-NEXT:    add a2, a0, ra
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a3, a0, ra
+; NOREMAT-NEXT:    vle32.v v28, (a3)
+; NOREMAT-NEXT:    vle32.v v30, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v6
 ; NOREMAT-NEXT:    slli s11, s10, 10
-; NOREMAT-NEXT:    add a2, a0, s11
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    add a3, a0, s11
+; NOREMAT-NEXT:    vle32.v v2, (a3)
+; NOREMAT-NEXT:    vle32.v v18, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v12
 ; NOREMAT-NEXT:    addiw s10, s1, 1536
-; NOREMAT-NEXT:    add a2, a0, s10
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a3, a0, s10
+; NOREMAT-NEXT:    vle32.v v4, (a3)
+; NOREMAT-NEXT:    vle32.v v20, (a3)
+; NOREMAT-NEXT:    addi a3, sp, 288
+; NOREMAT-NEXT:    vl2r.v v12, (a3) # Unknown-size Folded Reload
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v8
 ; NOREMAT-NEXT:    slli s9, s8, 11
-; NOREMAT-NEXT:    add a2, a0, s9
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    lui t1, 6
-; NOREMAT-NEXT:    addiw s8, t1, -1536
-; NOREMAT-NEXT:    add a2, a0, s8
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a3, a0, s9
+; NOREMAT-NEXT:    vle32.v v6, (a3)
+; NOREMAT-NEXT:    vle32.v v12, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v24, v10
+; NOREMAT-NEXT:    lui t0, 6
+; NOREMAT-NEXT:    addiw s8, t0, -1536
+; NOREMAT-NEXT:    add a3, a0, s8
+; NOREMAT-NEXT:    vle32.v v8, (a3)
+; NOREMAT-NEXT:    vle32.v v22, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v0
 ; NOREMAT-NEXT:    slli s7, s6, 10
-; NOREMAT-NEXT:    add a2, a0, s7
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    addiw s6, t1, -512
-; NOREMAT-NEXT:    add a2, a0, s6
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    add a2, a0, t1
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    addiw s5, t1, 512
-; NOREMAT-NEXT:    add a2, a0, s5
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a3, a0, s7
+; NOREMAT-NEXT:    vle32.v v10, (a3)
+; NOREMAT-NEXT:    vle32.v v14, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v26
+; NOREMAT-NEXT:    addiw s6, t0, -512
+; NOREMAT-NEXT:    add a3, a0, s6
+; NOREMAT-NEXT:    vle32.v v0, (a3)
+; NOREMAT-NEXT:    vle32.v v16, (a3)
+; NOREMAT-NEXT:    csrr a3, vlenb
+; NOREMAT-NEXT:    slli a3, a3, 2
+; NOREMAT-NEXT:    add a3, sp, a3
+; NOREMAT-NEXT:    addi a3, a3, 288
+; NOREMAT-NEXT:    vl2r.v v24, (a3) # Unknown-size Folded Reload
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v24, v28
+; NOREMAT-NEXT:    add a3, a0, t0
+; NOREMAT-NEXT:    vle32.v v24, (a3)
+; NOREMAT-NEXT:    vle32.v v26, (a3)
+; NOREMAT-NEXT:    csrr a3, vlenb
+; NOREMAT-NEXT:    slli a3, a3, 2
+; NOREMAT-NEXT:    add a3, sp, a3
+; NOREMAT-NEXT:    addi a3, a3, 288
+; NOREMAT-NEXT:    vs2r.v v26, (a3) # Unknown-size Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v2
+; NOREMAT-NEXT:    addiw s5, t0, 512
+; NOREMAT-NEXT:    add a3, a0, s5
+; NOREMAT-NEXT:    vle32.v v26, (a3)
+; NOREMAT-NEXT:    vle32.v v28, (a3)
+; NOREMAT-NEXT:    csrr a3, vlenb
+; NOREMAT-NEXT:    slli a3, a3, 1
+; NOREMAT-NEXT:    add a3, sp, a3
+; NOREMAT-NEXT:    addi a3, a3, 288
+; NOREMAT-NEXT:    vs2r.v v28, (a3) # Unknown-size Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v4
 ; NOREMAT-NEXT:    slli s4, s3, 10
-; NOREMAT-NEXT:    add a2, a0, s4
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    addiw s3, t1, 1536
-; NOREMAT-NEXT:    add a2, a0, s3
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a3, a0, s4
+; NOREMAT-NEXT:    vle32.v v28, (a3)
+; NOREMAT-NEXT:    vle32.v v18, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v6
+; NOREMAT-NEXT:    addiw s3, t0, 1536
+; NOREMAT-NEXT:    add a3, a0, s3
+; NOREMAT-NEXT:    vle32.v v30, (a3)
+; NOREMAT-NEXT:    vle32.v v20, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v8
 ; NOREMAT-NEXT:    slli s2, s2, 11
-; NOREMAT-NEXT:    add a2, a0, s2
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    add a3, a0, s2
+; NOREMAT-NEXT:    vle32.v v2, (a3)
+; NOREMAT-NEXT:    vle32.v v12, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v10
 ; NOREMAT-NEXT:    lui a3, 7
 ; NOREMAT-NEXT:    addiw s0, a3, -1536
-; NOREMAT-NEXT:    add a2, a0, s0
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a4, a0, s0
+; NOREMAT-NEXT:    vle32.v v4, (a4)
+; NOREMAT-NEXT:    vle32.v v22, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v0
 ; NOREMAT-NEXT:    slli t6, t5, 10
-; NOREMAT-NEXT:    add a2, a0, t6
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    add a4, a0, t6
+; NOREMAT-NEXT:    vle32.v v6, (a4)
+; NOREMAT-NEXT:    vle32.v v14, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v24
 ; NOREMAT-NEXT:    addiw t5, a3, -512
-; NOREMAT-NEXT:    add a2, a0, t5
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    add a2, a0, a3
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    add a4, a0, t5
+; NOREMAT-NEXT:    vle32.v v0, (a4)
+; NOREMAT-NEXT:    vle32.v v16, (a4)
+; NOREMAT-NEXT:    csrr a4, vlenb
+; NOREMAT-NEXT:    slli a4, a4, 2
+; NOREMAT-NEXT:    add a4, sp, a4
+; NOREMAT-NEXT:    addi a4, a4, 288
+; NOREMAT-NEXT:    vl2r.v v8, (a4) # Unknown-size Folded Reload
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v26
+; NOREMAT-NEXT:    add a4, a0, a3
+; NOREMAT-NEXT:    vle32.v v26, (a4)
+; NOREMAT-NEXT:    vle32.v v8, (a4)
+; NOREMAT-NEXT:    csrr a4, vlenb
+; NOREMAT-NEXT:    slli a4, a4, 1
+; NOREMAT-NEXT:    add a4, sp, a4
+; NOREMAT-NEXT:    addi a4, a4, 288
+; NOREMAT-NEXT:    vl2r.v v10, (a4) # Unknown-size Folded Reload
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v28
 ; NOREMAT-NEXT:    addiw t4, a3, 512
-; NOREMAT-NEXT:    add a2, a0, t4
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a4, a0, t4
+; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    vle32.v v24, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v30
 ; NOREMAT-NEXT:    slli t3, t2, 10
-; NOREMAT-NEXT:    add a2, a0, t3
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    add a4, a0, t3
+; NOREMAT-NEXT:    vle32.v v18, (a4)
+; NOREMAT-NEXT:    vle32.v v28, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v2
 ; NOREMAT-NEXT:    addiw t2, a3, 1536
-; NOREMAT-NEXT:    add a2, a0, t2
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    slli t0, t0, 11
-; NOREMAT-NEXT:    add a2, a0, t0
+; NOREMAT-NEXT:    add a4, a0, t2
+; NOREMAT-NEXT:    vle32.v v20, (a4)
+; NOREMAT-NEXT:    vle32.v v30, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v4
+; NOREMAT-NEXT:    slli t1, a2, 11
+; NOREMAT-NEXT:    add a2, a0, t1
 ; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    vle32.v v2, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v6
 ; NOREMAT-NEXT:    lui a2, 8
 ; NOREMAT-NEXT:    addiw a7, a2, -1536
 ; NOREMAT-NEXT:    add a4, a0, a7
-; NOREMAT-NEXT:    vle32.v v14, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    vle32.v v22, (a4)
+; NOREMAT-NEXT:    vle32.v v4, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v0
 ; NOREMAT-NEXT:    slli a6, a5, 10
 ; NOREMAT-NEXT:    add a4, a0, a6
-; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a4)
+; NOREMAT-NEXT:    vle32.v v14, (a4)
+; NOREMAT-NEXT:    vle32.v v0, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v26
 ; NOREMAT-NEXT:    addiw a5, a2, -512
 ; NOREMAT-NEXT:    add a4, a0, a5
-; NOREMAT-NEXT:    vle32.v v14, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    vle32.v v16, (a4)
+; NOREMAT-NEXT:    vle32.v v26, (a4)
 ; NOREMAT-NEXT:    add a0, a0, a2
-; NOREMAT-NEXT:    vle32.v v12, (a0)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v6, (a0)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v10
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v24, v18
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v28, v20
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v12
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v2, v22
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v4, v14
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v0, v16
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v26, v6
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    addi a0, a1, 1024
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    addi a0, a1, 1536
-; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld a0, 272(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld a0, 264(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    lui a0, 1
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    lui a0, 1
+; NOREMAT-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 208(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 200(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 192(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    lui a0, 2
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    lui a0, 2
+; NOREMAT-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    lui a0, 3
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    ld a0, 120(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    lui a0, 3
+; NOREMAT-NEXT:    ld a0, 120(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 112(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 96(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 80(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    lui a0, 4
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    ld a0, 64(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    lui a0, 4
+; NOREMAT-NEXT:    ld a0, 64(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    add a0, a1, a0
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    add a0, a1, a0
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    add s1, a1, s1
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (s1)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add ra, a1, ra
+; NOREMAT-NEXT:    vse32.v v8, (ra)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (ra)
 ; NOREMAT-NEXT:    add s11, a1, s11
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (s11)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add s10, a1, s10
+; NOREMAT-NEXT:    vse32.v v8, (s10)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (s10)
 ; NOREMAT-NEXT:    add s9, a1, s9
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (s9)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add s8, a1, s8
+; NOREMAT-NEXT:    vse32.v v8, (s8)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (s8)
 ; NOREMAT-NEXT:    add s7, a1, s7
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (s7)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add s6, a1, s6
+; NOREMAT-NEXT:    vse32.v v8, (s6)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    add t0, a1, t0
+; NOREMAT-NEXT:    vse32.v v8, (t0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (s6)
-; NOREMAT-NEXT:    add t1, a1, t1
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
-; NOREMAT-NEXT:    vse32.v v8, (t1)
 ; NOREMAT-NEXT:    add s5, a1, s5
+; NOREMAT-NEXT:    vse32.v v8, (s5)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (s5)
 ; NOREMAT-NEXT:    add s4, a1, s4
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (s4)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add s3, a1, s3
+; NOREMAT-NEXT:    vse32.v v8, (s3)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (s3)
 ; NOREMAT-NEXT:    add s2, a1, s2
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (s2)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add s0, a1, s0
+; NOREMAT-NEXT:    vse32.v v8, (s0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (s0)
 ; NOREMAT-NEXT:    add t6, a1, t6
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (t6)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add t5, a1, t5
+; NOREMAT-NEXT:    vse32.v v8, (t5)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (t5)
 ; NOREMAT-NEXT:    add a3, a1, a3
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a3)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add t4, a1, t4
+; NOREMAT-NEXT:    vse32.v v8, (t4)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (t4)
 ; NOREMAT-NEXT:    add t3, a1, t3
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (t3)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add t2, a1, t2
+; NOREMAT-NEXT:    vse32.v v8, (t2)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    add t1, a1, t1
+; NOREMAT-NEXT:    vse32.v v8, (t1)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (t2)
-; NOREMAT-NEXT:    add t0, a1, t0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
-; NOREMAT-NEXT:    vse32.v v8, (t0)
 ; NOREMAT-NEXT:    add a7, a1, a7
+; NOREMAT-NEXT:    vse32.v v8, (a7)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a7)
 ; NOREMAT-NEXT:    add a6, a1, a6
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a6)
-; NOREMAT-NEXT:    add a5, a1, a5
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a5)
-; NOREMAT-NEXT:    add a0, a1, a2
+; NOREMAT-NEXT:    add a5, a1, a5
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
-; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    addiw a0, a2, 512
-; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a5)
+; NOREMAT-NEXT:    add a0, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    addiw a0, a2, 1024
+; NOREMAT-NEXT:    addiw a0, a2, 512
 ; NOREMAT-NEXT:    add a0, a1, a0
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    addiw a0, a2, 1536
+; NOREMAT-NEXT:    addiw a0, a2, 1024
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    li a0, 17
-; NOREMAT-NEXT:    slli a0, a0, 11
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    addiw a0, a2, 1536
 ; NOREMAT-NEXT:    add a0, a1, a0
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    li a0, 17
+; NOREMAT-NEXT:    slli a0, a0, 11
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    lui a0, 9
 ; NOREMAT-NEXT:    addiw a2, a0, -1536
 ; NOREMAT-NEXT:    add a2, a1, a2
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a2)
-; NOREMAT-NEXT:    addiw a2, a0, -1024
-; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a2)
-; NOREMAT-NEXT:    addiw a2, a0, -512
+; NOREMAT-NEXT:    addiw a2, a0, -1024
 ; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    vse32.v v10, (a2)
-; NOREMAT-NEXT:    add a2, a1, a0
+; NOREMAT-NEXT:    addiw a2, a0, -512
+; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a2)
-; NOREMAT-NEXT:    addiw a2, a0, 512
-; NOREMAT-NEXT:    add a2, a1, a2
+; NOREMAT-NEXT:    add a2, a1, a0
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    vse32.v v10, (a2)
-; NOREMAT-NEXT:    addiw a2, a0, 1024
+; NOREMAT-NEXT:    addiw a2, a0, 512
 ; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a2)
+; NOREMAT-NEXT:    addiw a2, a0, 1024
+; NOREMAT-NEXT:    add a2, a1, a2
+; NOREMAT-NEXT:    vse32.v v10, (a2)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    addiw a0, a0, 1536
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    li a0, 19
 ; NOREMAT-NEXT:    slli a0, a0, 11
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
-; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    lui a0, 10
 ; NOREMAT-NEXT:    addiw a2, a0, -1536
 ; NOREMAT-NEXT:    add a2, a1, a2
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a2)
-; NOREMAT-NEXT:    addiw a2, a0, -1024
-; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a2)
-; NOREMAT-NEXT:    addiw a2, a0, -512
+; NOREMAT-NEXT:    addiw a2, a0, -1024
 ; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    vse32.v v10, (a2)
-; NOREMAT-NEXT:    add a2, a1, a0
+; NOREMAT-NEXT:    addiw a2, a0, -512
+; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a2)
+; NOREMAT-NEXT:    add a2, a1, a0
+; NOREMAT-NEXT:    vse32.v v10, (a2)
 ; NOREMAT-NEXT:    addiw a0, a0, 512
 ; NOREMAT-NEXT:    add a0, a1, a0
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    ld ra, 360(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s0, 352(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s1, 344(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s2, 336(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s3, 328(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s4, 320(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s5, 312(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s6, 304(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s7, 296(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s8, 288(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s9, 280(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s10, 272(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s11, 264(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    addi sp, sp, 368
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    csrr a0, vlenb
+; NOREMAT-NEXT:    li a1, 6
+; NOREMAT-NEXT:    mul a0, a0, a1
+; NOREMAT-NEXT:    add sp, sp, a0
+; NOREMAT-NEXT:    ld ra, 392(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s0, 384(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s1, 376(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s2, 368(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s3, 360(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s4, 352(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s5, 344(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s6, 336(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s7, 328(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s8, 320(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s9, 312(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s10, 304(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s11, 296(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    addi sp, sp, 400
 ; NOREMAT-NEXT:    ret
 ;
 ; REMAT-LABEL: test:
@@ -864,512 +927,512 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    add a2, a0, a2
 ; REMAT-NEXT:    vle32.v v14, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v12, (a2)
 ; REMAT-NEXT:    li a2, 11
 ; REMAT-NEXT:    slli a2, a2, 10
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    vle32.v v16, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v10, (a2)
 ; REMAT-NEXT:    li a2, 23
 ; REMAT-NEXT:    slli a2, a2, 9
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v26, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v16
+; REMAT-NEXT:    vle32.v v12, (a2)
 ; REMAT-NEXT:    lui a2, 3
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v28, (a2)
+; REMAT-NEXT:    vle32.v v14, (a2)
 ; REMAT-NEXT:    li a2, 25
 ; REMAT-NEXT:    slli a2, a2, 9
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v30, (a2)
+; REMAT-NEXT:    vle32.v v16, (a2)
 ; REMAT-NEXT:    li a2, 13
 ; REMAT-NEXT:    slli a2, a2, 10
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v0, (a2)
+; REMAT-NEXT:    vle32.v v18, (a2)
 ; REMAT-NEXT:    li a2, 27
 ; REMAT-NEXT:    slli a2, a2, 9
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v2, (a2)
+; REMAT-NEXT:    vle32.v v20, (a2)
 ; REMAT-NEXT:    li a2, 7
 ; REMAT-NEXT:    slli a2, a2, 11
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v4, (a2)
+; REMAT-NEXT:    vle32.v v22, (a2)
 ; REMAT-NEXT:    li a2, 29
 ; REMAT-NEXT:    slli a2, a2, 9
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v24, (a2)
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v26
 ; REMAT-NEXT:    li a2, 15
 ; REMAT-NEXT:    slli a2, a2, 10
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v26, (a2)
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v28
 ; REMAT-NEXT:    li a2, 31
 ; REMAT-NEXT:    slli a2, a2, 9
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v28, (a2)
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v30
 ; REMAT-NEXT:    lui a2, 4
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v30, (a2)
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v0
 ; REMAT-NEXT:    lui a2, 4
 ; REMAT-NEXT:    addiw a2, a2, 512
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v0, (a2)
+; REMAT-NEXT:    vle32.v v16, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v2
 ; REMAT-NEXT:    li a2, 17
 ; REMAT-NEXT:    slli a2, a2, 10
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v2, (a2)
+; REMAT-NEXT:    vle32.v v18, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v4
 ; REMAT-NEXT:    lui a2, 4
 ; REMAT-NEXT:    addiw a2, a2, 1536
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v4, (a2)
+; REMAT-NEXT:    vle32.v v20, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v24
 ; REMAT-NEXT:    li a2, 9
 ; REMAT-NEXT:    slli a2, a2, 11
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v24, (a2)
+; REMAT-NEXT:    vle32.v v22, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v26
 ; REMAT-NEXT:    lui a2, 5
 ; REMAT-NEXT:    addiw a2, a2, -1536
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v26, (a2)
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v28
 ; REMAT-NEXT:    li a2, 19
 ; REMAT-NEXT:    slli a2, a2, 10
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v28, (a2)
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v30
 ; REMAT-NEXT:    lui ra, 5
 ; REMAT-NEXT:    addiw ra, ra, -512
 ; REMAT-NEXT:    add a2, a0, ra
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v30, (a2)
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v0
 ; REMAT-NEXT:    lui s11, 5
 ; REMAT-NEXT:    add a2, a0, s11
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v0, (a2)
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v2
 ; REMAT-NEXT:    lui s10, 5
 ; REMAT-NEXT:    addiw s10, s10, 512
 ; REMAT-NEXT:    add a2, a0, s10
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v2, (a2)
+; REMAT-NEXT:    vle32.v v16, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v4
 ; REMAT-NEXT:    li s9, 21
 ; REMAT-NEXT:    slli s9, s9, 10
 ; REMAT-NEXT:    add a2, a0, s9
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v4, (a2)
+; REMAT-NEXT:    vle32.v v18, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v24
 ; REMAT-NEXT:    lui s8, 5
 ; REMAT-NEXT:    addiw s8, s8, 1536
 ; REMAT-NEXT:    add a2, a0, s8
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v24, (a2)
+; REMAT-NEXT:    vle32.v v20, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v26
 ; REMAT-NEXT:    li s7, 11
 ; REMAT-NEXT:    slli s7, s7, 11
 ; REMAT-NEXT:    add a2, a0, s7
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v26, (a2)
+; REMAT-NEXT:    vle32.v v22, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v28
 ; REMAT-NEXT:    lui s6, 6
 ; REMAT-NEXT:    addiw s6, s6, -1536
 ; REMAT-NEXT:    add a2, a0, s6
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v28, (a2)
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v30
 ; REMAT-NEXT:    li s5, 23
 ; REMAT-NEXT:    slli s5, s5, 10
 ; REMAT-NEXT:    add a2, a0, s5
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v30, (a2)
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v0
 ; REMAT-NEXT:    lui s4, 6
 ; REMAT-NEXT:    addiw s4, s4, -512
 ; REMAT-NEXT:    add a2, a0, s4
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v0, (a2)
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v2
 ; REMAT-NEXT:    lui s3, 6
 ; REMAT-NEXT:    add a2, a0, s3
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v2, (a2)
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v4
 ; REMAT-NEXT:    lui s2, 6
 ; REMAT-NEXT:    addiw s2, s2, 512
 ; REMAT-NEXT:    add a2, a0, s2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v4, (a2)
+; REMAT-NEXT:    vle32.v v16, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v24
 ; REMAT-NEXT:    li s1, 25
 ; REMAT-NEXT:    slli s1, s1, 10
 ; REMAT-NEXT:    add a2, a0, s1
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v6, (a2)
+; REMAT-NEXT:    vle32.v v18, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v26
 ; REMAT-NEXT:    lui s0, 6
 ; REMAT-NEXT:    addiw s0, s0, 1536
 ; REMAT-NEXT:    add a2, a0, s0
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v26, (a2)
+; REMAT-NEXT:    vle32.v v20, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v28
 ; REMAT-NEXT:    li t6, 13
 ; REMAT-NEXT:    slli t6, t6, 11
 ; REMAT-NEXT:    add a2, a0, t6
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v28, (a2)
+; REMAT-NEXT:    vle32.v v22, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v30
 ; REMAT-NEXT:    lui t5, 7
 ; REMAT-NEXT:    addiw t5, t5, -1536
 ; REMAT-NEXT:    add a2, a0, t5
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v30, (a2)
+; REMAT-NEXT:    vle32.v v24, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v0
 ; REMAT-NEXT:    li t4, 27
 ; REMAT-NEXT:    slli t4, t4, 10
 ; REMAT-NEXT:    add a2, a0, t4
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v0, (a2)
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v2
 ; REMAT-NEXT:    lui t3, 7
 ; REMAT-NEXT:    addiw t3, t3, -512
 ; REMAT-NEXT:    add a2, a0, t3
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v2, (a2)
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v4
 ; REMAT-NEXT:    lui t2, 7
 ; REMAT-NEXT:    add a2, a0, t2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v4, (a2)
 ; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v6
 ; REMAT-NEXT:    lui t1, 7
 ; REMAT-NEXT:    addiw t1, t1, 512
 ; REMAT-NEXT:    add a2, a0, t1
 ; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v16, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v26
 ; REMAT-NEXT:    li t0, 29
 ; REMAT-NEXT:    slli t0, t0, 10
 ; REMAT-NEXT:    add a2, a0, t0
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v18, (a2)
+; REMAT-NEXT:    vle32.v v26, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v28
 ; REMAT-NEXT:    lui a7, 7
 ; REMAT-NEXT:    addiw a7, a7, 1536
 ; REMAT-NEXT:    add a2, a0, a7
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v20, (a2)
+; REMAT-NEXT:    vle32.v v28, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v30
 ; REMAT-NEXT:    li a6, 15
 ; REMAT-NEXT:    slli a6, a6, 11
 ; REMAT-NEXT:    add a2, a0, a6
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v22, (a2)
+; REMAT-NEXT:    vle32.v v30, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v24, v0
 ; REMAT-NEXT:    lui a5, 8
 ; REMAT-NEXT:    addiw a5, a5, -1536
 ; REMAT-NEXT:    add a2, a0, a5
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v24, (a2)
+; REMAT-NEXT:    vle32.v v0, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v2
 ; REMAT-NEXT:    li a4, 31
 ; REMAT-NEXT:    slli a4, a4, 10
 ; REMAT-NEXT:    add a2, a0, a4
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v2, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v4
 ; REMAT-NEXT:    lui a3, 8
 ; REMAT-NEXT:    addiw a3, a3, -512
 ; REMAT-NEXT:    add a2, a0, a3
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    vle32.v v4, (a2)
 ; REMAT-NEXT:    lui a2, 8
 ; REMAT-NEXT:    add a0, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a0)
+; REMAT-NEXT:    vle32.v v6, (a0)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v18
+; REMAT-NEXT:    sf.vc.vv 3, 0, v26, v20
+; REMAT-NEXT:    sf.vc.vv 3, 0, v28, v22
+; REMAT-NEXT:    sf.vc.vv 3, 0, v30, v24
+; REMAT-NEXT:    sf.vc.vv 3, 0, v0, v10
+; REMAT-NEXT:    sf.vc.vv 3, 0, v2, v12
+; REMAT-NEXT:    sf.vc.vv 3, 0, v4, v6
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    addi a0, a1, 1024
 ; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    addi a0, a1, 1536
-; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 1
 ; REMAT-NEXT:    slli a0, a0, 11
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 5
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 3
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 7
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    lui a0, 1
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 9
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 5
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 11
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 3
 ; REMAT-NEXT:    slli a0, a0, 11
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 13
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 7
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 15
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    lui a0, 2
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 17
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 9
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 19
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 5
 ; REMAT-NEXT:    slli a0, a0, 11
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 21
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 11
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 23
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    lui a0, 3
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 25
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 13
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 27
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 7
 ; REMAT-NEXT:    slli a0, a0, 11
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 29
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 15
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 31
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    lui a0, 4
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    lui a0, 4
 ; REMAT-NEXT:    addiw a0, a0, 512
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 17
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    lui a0, 4
 ; REMAT-NEXT:    addiw a0, a0, 1536
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 9
 ; REMAT-NEXT:    slli a0, a0, 11
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    lui a0, 5
 ; REMAT-NEXT:    addiw a0, a0, -1536
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 19
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add ra, a1, ra
+; REMAT-NEXT:    vse32.v v8, (ra)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (ra)
 ; REMAT-NEXT:    add s11, a1, s11
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (s11)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add s10, a1, s10
+; REMAT-NEXT:    vse32.v v8, (s10)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (s10)
 ; REMAT-NEXT:    add s9, a1, s9
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (s9)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add s8, a1, s8
+; REMAT-NEXT:    vse32.v v8, (s8)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (s8)
 ; REMAT-NEXT:    add s7, a1, s7
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (s7)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add s6, a1, s6
+; REMAT-NEXT:    vse32.v v8, (s6)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (s6)
 ; REMAT-NEXT:    add s5, a1, s5
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (s5)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add s4, a1, s4
+; REMAT-NEXT:    vse32.v v8, (s4)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (s4)
 ; REMAT-NEXT:    add s3, a1, s3
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (s3)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add s2, a1, s2
+; REMAT-NEXT:    vse32.v v8, (s2)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (s2)
 ; REMAT-NEXT:    add s1, a1, s1
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (s1)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add s0, a1, s0
+; REMAT-NEXT:    vse32.v v8, (s0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (s0)
 ; REMAT-NEXT:    add t6, a1, t6
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (t6)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add t5, a1, t5
+; REMAT-NEXT:    vse32.v v8, (t5)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (t5)
 ; REMAT-NEXT:    add t4, a1, t4
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (t4)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add t3, a1, t3
+; REMAT-NEXT:    vse32.v v8, (t3)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (t3)
 ; REMAT-NEXT:    add t2, a1, t2
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (t2)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add t1, a1, t1
+; REMAT-NEXT:    vse32.v v8, (t1)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (t1)
 ; REMAT-NEXT:    add t0, a1, t0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (t0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add a7, a1, a7
+; REMAT-NEXT:    vse32.v v8, (a7)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a7)
 ; REMAT-NEXT:    add a6, a1, a6
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a6)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add a5, a1, a5
+; REMAT-NEXT:    vse32.v v8, (a5)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a5)
 ; REMAT-NEXT:    add a4, a1, a4
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a4)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add a3, a1, a3
+; REMAT-NEXT:    vse32.v v8, (a3)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a3)
 ; REMAT-NEXT:    add a2, a1, a2
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a2)
@@ -1449,13 +1512,13 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    lui a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    lui a0, 10
 ; REMAT-NEXT:    addiw a0, a0, 512
 ; REMAT-NEXT:    add a0, a1, a0
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    ld ra, 104(sp) # 8-byte Folded Reload
 ; REMAT-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload


        


More information about the llvm-commits mailing list