[llvm] [RISCV] Allow VCIX with SE to reorder (PR #77049)

Brandon Wu via llvm-commits llvm-commits at lists.llvm.org
Thu Jan 4 22:33:48 PST 2024


https://github.com/4vtomat created https://github.com/llvm/llvm-project/pull/77049

This patch sets HasSideEffect to 0, it allows VCIX instructions
that have side effect to be reordered.


>From 28b8a47e949e4eee2408789efe03cc1ca06a9363 Mon Sep 17 00:00:00 2001
From: Brandon Wu <brandon.wu at sifive.com>
Date: Thu, 4 Jan 2024 21:44:34 -0800
Subject: [PATCH] [RISCV] Allow VCIX with SE to reorder

This patch sets HasSideEffect to 0, it allows VCIX instructions
that have side effect to be reordered.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  292 ++++-
 llvm/lib/Target/RISCV/RISCVISelLowering.h   |   71 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td  |  229 +++-
 llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp |    3 +
 llvm/lib/Target/RISCV/RISCVRegisterInfo.td  |    3 +
 llvm/test/CodeGen/RISCV/pr69586.ll          | 1267 ++++++++++---------
 6 files changed, 1169 insertions(+), 696 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index bc4b2b022c0ae9..512c7b340cb47b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8308,25 +8308,63 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG,
   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res);
 }
 
-static void getVCIXOperands(SDValue &Op, SelectionDAG &DAG,
-                            SmallVector<SDValue> &Ops) {
+static inline void promoteVCIXScalar(const SDValue &Op,
+                                     SmallVectorImpl<SDValue> &Operands,
+                                     SelectionDAG &DAG) {
+  const RISCVSubtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
+
+  bool HasChain = Op.getOpcode() == ISD::INTRINSIC_VOID ||
+                  Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
+  unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
   SDLoc DL(Op);
 
+  const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
+      RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
+  if (!II || !II->hasScalarOperand())
+    return;
+
+  unsigned SplatOp = II->ScalarOperand + 1;
+  assert(SplatOp < Op.getNumOperands());
+
+  SDValue &ScalarOp = Operands[SplatOp];
+  MVT OpVT = ScalarOp.getSimpleValueType();
+  MVT XLenVT = Subtarget.getXLenVT();
+
+  // The code below is partially copied from lowerVectorIntrinsicScalars.
+  // If this isn't a scalar, or its type is XLenVT we're done.
+  if (!OpVT.isScalarInteger() || OpVT == XLenVT)
+    return;
+
+  // Manually emit promote operation for scalar operation.
+  if (OpVT.bitsLT(XLenVT)) {
+    unsigned ExtOpc =
+        isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
+    ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
+  }
+
+  return;
+}
+
+static void processVCIXOperands(SDValue &OrigOp,
+                                SmallVectorImpl<SDValue> &Operands,
+                                SelectionDAG &DAG) {
+  promoteVCIXScalar(OrigOp, Operands, DAG);
   const RISCVSubtarget &Subtarget =
       DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
-  for (const SDValue &V : Op->op_values()) {
+  for (SDValue &V : Operands) {
     EVT ValType = V.getValueType();
-    if (ValType.isScalableVector() && ValType.isFloatingPoint()) {
+    if (ValType.isVector() && ValType.isFloatingPoint()) {
       MVT InterimIVT =
           MVT::getVectorVT(MVT::getIntegerVT(ValType.getScalarSizeInBits()),
                            ValType.getVectorElementCount());
-      Ops.push_back(DAG.getBitcast(InterimIVT, V));
-    } else if (ValType.isFixedLengthVector()) {
+      V = DAG.getBitcast(InterimIVT, V);
+    }
+    if (ValType.isFixedLengthVector()) {
       MVT OpContainerVT = getContainerForFixedLengthVector(
           DAG, V.getSimpleValueType(), Subtarget);
-      Ops.push_back(convertToScalableVector(OpContainerVT, V, DAG, Subtarget));
-    } else
-      Ops.push_back(V);
+      V = convertToScalableVector(OpContainerVT, V, DAG, Subtarget);
+    }
   }
 }
 
@@ -8557,8 +8595,8 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::riscv_sf_vc_v_fvw: {
     MVT VT = Op.getSimpleValueType();
 
-    SmallVector<SDValue> Ops;
-    getVCIXOperands(Op, DAG, Ops);
+    SmallVector<SDValue> Operands{Op->op_values()};
+    processVCIXOperands(Op, Operands, DAG);
 
     MVT RetVT = VT;
     if (VT.isFixedLengthVector())
@@ -8567,7 +8605,7 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
                                VT.getVectorElementCount());
 
-    SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Ops);
+    SDValue NewNode = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, RetVT, Operands);
 
     if (VT.isFixedLengthVector())
       NewNode = convertFromScalableVector(VT, NewNode, DAG, Subtarget);
@@ -8584,6 +8622,52 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
 }
 
+static inline SDValue getVCIXISDNodeWCHAIN(SDValue &Op, SelectionDAG &DAG,
+                                           unsigned Type) {
+  SDLoc DL(Op);
+  SmallVector<SDValue> Operands{Op->op_values()};
+  Operands.erase(Operands.begin() + 1);
+
+  const RISCVSubtarget &Subtarget =
+      DAG.getMachineFunction().getSubtarget<RISCVSubtarget>();
+  MVT VT = Op.getSimpleValueType();
+  MVT RetVT = VT;
+  MVT FloatVT = VT;
+
+  if (VT.isFloatingPoint()) {
+    RetVT = MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits()),
+                             VT.getVectorElementCount());
+    FloatVT = RetVT;
+  }
+  if (VT.isFixedLengthVector())
+    RetVT = getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), RetVT,
+                                             Subtarget);
+
+  processVCIXOperands(Op, Operands, DAG);
+
+  SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
+  SDValue NewNode = DAG.getNode(Type, DL, VTs, Operands);
+  SDValue Chain = NewNode.getValue(1);
+
+  if (VT.isFixedLengthVector())
+    NewNode = convertFromScalableVector(FloatVT, NewNode, DAG, Subtarget);
+  if (VT.isFloatingPoint())
+    NewNode = DAG.getBitcast(VT, NewNode);
+
+  NewNode = DAG.getMergeValues({NewNode, Chain}, DL);
+
+  return NewNode;
+}
+
+static inline SDValue getVCIXISDNodeVOID(SDValue &Op, SelectionDAG &DAG,
+                                         unsigned Type) {
+  SmallVector<SDValue> Operands{Op->op_values()};
+  Operands.erase(Operands.begin() + 1);
+  processVCIXOperands(Op, Operands, DAG);
+
+  return DAG.getNode(Type, SDLoc(Op), Op.getValueType(), Operands);
+}
+
 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
                                                     SelectionDAG &DAG) const {
   unsigned IntNo = Op.getConstantOperandVal(1);
@@ -8701,48 +8785,33 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
     return DAG.getMergeValues(Results, DL);
   }
   case Intrinsic::riscv_sf_vc_v_x_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE);
   case Intrinsic::riscv_sf_vc_v_i_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_I_SE);
   case Intrinsic::riscv_sf_vc_v_xv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XV_SE);
   case Intrinsic::riscv_sf_vc_v_iv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IV_SE);
   case Intrinsic::riscv_sf_vc_v_vv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VV_SE);
   case Intrinsic::riscv_sf_vc_v_fv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FV_SE);
   case Intrinsic::riscv_sf_vc_v_xvv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVV_SE);
   case Intrinsic::riscv_sf_vc_v_ivv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVV_SE);
   case Intrinsic::riscv_sf_vc_v_vvv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVV_SE);
   case Intrinsic::riscv_sf_vc_v_fvv_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVV_SE);
   case Intrinsic::riscv_sf_vc_v_xvw_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_XVW_SE);
   case Intrinsic::riscv_sf_vc_v_ivw_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_IVW_SE);
   case Intrinsic::riscv_sf_vc_v_vvw_se:
-  case Intrinsic::riscv_sf_vc_v_fvw_se: {
-    MVT VT = Op.getSimpleValueType();
-    SDLoc DL(Op);
-    SmallVector<SDValue> Ops;
-    getVCIXOperands(Op, DAG, Ops);
-
-    MVT RetVT = VT;
-    if (VT.isFixedLengthVector())
-      RetVT = getContainerForFixedLengthVector(VT);
-    else if (VT.isFloatingPoint())
-      RetVT = MVT::getVectorVT(MVT::getIntegerVT(RetVT.getScalarSizeInBits()),
-                               RetVT.getVectorElementCount());
-
-    SDVTList VTs = DAG.getVTList({RetVT, MVT::Other});
-    SDValue NewNode = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops);
-
-    if (VT.isFixedLengthVector()) {
-      SDValue FixedVector =
-          convertFromScalableVector(VT, NewNode, DAG, Subtarget);
-      NewNode = DAG.getMergeValues({FixedVector, NewNode.getValue(1)}, DL);
-    } else if (VT.isFloatingPoint()) {
-      SDValue BitCast = DAG.getBitcast(VT, NewNode.getValue(0));
-      NewNode = DAG.getMergeValues({BitCast, NewNode.getValue(1)}, DL);
-    }
-
-    if (Op == NewNode)
-      break;
-
-    return NewNode;
-  }
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_VVW_SE);
+  case Intrinsic::riscv_sf_vc_v_fvw_se:
+    return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_FVW_SE);
   }
 
   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
@@ -8832,72 +8901,117 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
         FixedIntrinsic->getMemoryVT(), FixedIntrinsic->getMemOperand());
   }
   case Intrinsic::riscv_sf_vc_x_se_e8mf8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8MF8);
   case Intrinsic::riscv_sf_vc_x_se_e8mf4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8MF4);
   case Intrinsic::riscv_sf_vc_x_se_e8mf2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8MF2);
   case Intrinsic::riscv_sf_vc_x_se_e8m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8M1);
   case Intrinsic::riscv_sf_vc_x_se_e8m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8M2);
   case Intrinsic::riscv_sf_vc_x_se_e8m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8M4);
   case Intrinsic::riscv_sf_vc_x_se_e8m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E8M8);
   case Intrinsic::riscv_sf_vc_x_se_e16mf4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16MF4);
   case Intrinsic::riscv_sf_vc_x_se_e16mf2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16MF2);
   case Intrinsic::riscv_sf_vc_x_se_e16m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16M1);
   case Intrinsic::riscv_sf_vc_x_se_e16m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16M2);
   case Intrinsic::riscv_sf_vc_x_se_e16m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16M4);
   case Intrinsic::riscv_sf_vc_x_se_e16m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E16M8);
   case Intrinsic::riscv_sf_vc_x_se_e32mf2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32MF2);
   case Intrinsic::riscv_sf_vc_x_se_e32m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32M1);
   case Intrinsic::riscv_sf_vc_x_se_e32m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32M2);
   case Intrinsic::riscv_sf_vc_x_se_e32m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32M4);
   case Intrinsic::riscv_sf_vc_x_se_e32m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E32M8);
   case Intrinsic::riscv_sf_vc_x_se_e64m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E64M1);
   case Intrinsic::riscv_sf_vc_x_se_e64m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E64M2);
   case Intrinsic::riscv_sf_vc_x_se_e64m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E64M4);
   case Intrinsic::riscv_sf_vc_x_se_e64m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_X_SE_E64M8);
   case Intrinsic::riscv_sf_vc_i_se_e8mf8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8MF8);
   case Intrinsic::riscv_sf_vc_i_se_e8mf4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8MF4);
   case Intrinsic::riscv_sf_vc_i_se_e8mf2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8MF2);
   case Intrinsic::riscv_sf_vc_i_se_e8m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8M1);
   case Intrinsic::riscv_sf_vc_i_se_e8m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8M2);
   case Intrinsic::riscv_sf_vc_i_se_e8m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8M4);
   case Intrinsic::riscv_sf_vc_i_se_e8m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E8M8);
   case Intrinsic::riscv_sf_vc_i_se_e16mf4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16MF4);
   case Intrinsic::riscv_sf_vc_i_se_e16mf2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16MF2);
   case Intrinsic::riscv_sf_vc_i_se_e16m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16M1);
   case Intrinsic::riscv_sf_vc_i_se_e16m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16M2);
   case Intrinsic::riscv_sf_vc_i_se_e16m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16M4);
   case Intrinsic::riscv_sf_vc_i_se_e16m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E16M8);
   case Intrinsic::riscv_sf_vc_i_se_e32mf2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32MF2);
   case Intrinsic::riscv_sf_vc_i_se_e32m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32M1);
   case Intrinsic::riscv_sf_vc_i_se_e32m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32M2);
   case Intrinsic::riscv_sf_vc_i_se_e32m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32M4);
   case Intrinsic::riscv_sf_vc_i_se_e32m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E32M8);
   case Intrinsic::riscv_sf_vc_i_se_e64m1:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E64M1);
   case Intrinsic::riscv_sf_vc_i_se_e64m2:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E64M2);
   case Intrinsic::riscv_sf_vc_i_se_e64m4:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E64M4);
   case Intrinsic::riscv_sf_vc_i_se_e64m8:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_I_SE_E64M8);
   case Intrinsic::riscv_sf_vc_xv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XV_SE);
   case Intrinsic::riscv_sf_vc_iv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IV_SE);
   case Intrinsic::riscv_sf_vc_vv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VV_SE);
   case Intrinsic::riscv_sf_vc_fv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FV_SE);
   case Intrinsic::riscv_sf_vc_xvv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVV_SE);
   case Intrinsic::riscv_sf_vc_ivv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVV_SE);
   case Intrinsic::riscv_sf_vc_vvv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVV_SE);
   case Intrinsic::riscv_sf_vc_fvv_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVV_SE);
   case Intrinsic::riscv_sf_vc_xvw_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_XVW_SE);
   case Intrinsic::riscv_sf_vc_ivw_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_IVW_SE);
   case Intrinsic::riscv_sf_vc_vvw_se:
-  case Intrinsic::riscv_sf_vc_fvw_se: {
-    SmallVector<SDValue> Ops;
-    getVCIXOperands(Op, DAG, Ops);
-
-    SDValue NewNode =
-        DAG.getNode(ISD::INTRINSIC_VOID, SDLoc(Op), Op->getVTList(), Ops);
-
-    if (Op == NewNode)
-      break;
-
-    return NewNode;
-  }
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_VVW_SE);
+  case Intrinsic::riscv_sf_vc_fvw_se:
+    return getVCIXISDNodeVOID(Op, DAG, RISCVISD::SF_VC_FVW_SE);
   }
 
   return lowerVectorIntrinsicScalars(Op, DAG, Subtarget);
@@ -18713,6 +18827,76 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(SWAP_CSR)
   NODE_NAME_CASE(CZERO_EQZ)
   NODE_NAME_CASE(CZERO_NEZ)
+  NODE_NAME_CASE(SF_VC_X_SE_E8MF8)
+  NODE_NAME_CASE(SF_VC_X_SE_E8MF4)
+  NODE_NAME_CASE(SF_VC_X_SE_E8MF2)
+  NODE_NAME_CASE(SF_VC_X_SE_E8M1)
+  NODE_NAME_CASE(SF_VC_X_SE_E8M2)
+  NODE_NAME_CASE(SF_VC_X_SE_E8M4)
+  NODE_NAME_CASE(SF_VC_X_SE_E8M8)
+  NODE_NAME_CASE(SF_VC_X_SE_E16MF4)
+  NODE_NAME_CASE(SF_VC_X_SE_E16MF2)
+  NODE_NAME_CASE(SF_VC_X_SE_E16M1)
+  NODE_NAME_CASE(SF_VC_X_SE_E16M2)
+  NODE_NAME_CASE(SF_VC_X_SE_E16M4)
+  NODE_NAME_CASE(SF_VC_X_SE_E16M8)
+  NODE_NAME_CASE(SF_VC_X_SE_E32MF2)
+  NODE_NAME_CASE(SF_VC_X_SE_E32M1)
+  NODE_NAME_CASE(SF_VC_X_SE_E32M2)
+  NODE_NAME_CASE(SF_VC_X_SE_E32M4)
+  NODE_NAME_CASE(SF_VC_X_SE_E32M8)
+  NODE_NAME_CASE(SF_VC_X_SE_E64M1)
+  NODE_NAME_CASE(SF_VC_X_SE_E64M2)
+  NODE_NAME_CASE(SF_VC_X_SE_E64M4)
+  NODE_NAME_CASE(SF_VC_X_SE_E64M8)
+  NODE_NAME_CASE(SF_VC_I_SE_E8MF8)
+  NODE_NAME_CASE(SF_VC_I_SE_E8MF4)
+  NODE_NAME_CASE(SF_VC_I_SE_E8MF2)
+  NODE_NAME_CASE(SF_VC_I_SE_E8M1)
+  NODE_NAME_CASE(SF_VC_I_SE_E8M2)
+  NODE_NAME_CASE(SF_VC_I_SE_E8M4)
+  NODE_NAME_CASE(SF_VC_I_SE_E8M8)
+  NODE_NAME_CASE(SF_VC_I_SE_E16MF4)
+  NODE_NAME_CASE(SF_VC_I_SE_E16MF2)
+  NODE_NAME_CASE(SF_VC_I_SE_E16M1)
+  NODE_NAME_CASE(SF_VC_I_SE_E16M2)
+  NODE_NAME_CASE(SF_VC_I_SE_E16M4)
+  NODE_NAME_CASE(SF_VC_I_SE_E16M8)
+  NODE_NAME_CASE(SF_VC_I_SE_E32MF2)
+  NODE_NAME_CASE(SF_VC_I_SE_E32M1)
+  NODE_NAME_CASE(SF_VC_I_SE_E32M2)
+  NODE_NAME_CASE(SF_VC_I_SE_E32M4)
+  NODE_NAME_CASE(SF_VC_I_SE_E32M8)
+  NODE_NAME_CASE(SF_VC_I_SE_E64M1)
+  NODE_NAME_CASE(SF_VC_I_SE_E64M2)
+  NODE_NAME_CASE(SF_VC_I_SE_E64M4)
+  NODE_NAME_CASE(SF_VC_I_SE_E64M8)
+  NODE_NAME_CASE(SF_VC_XV_SE)
+  NODE_NAME_CASE(SF_VC_IV_SE)
+  NODE_NAME_CASE(SF_VC_VV_SE)
+  NODE_NAME_CASE(SF_VC_FV_SE)
+  NODE_NAME_CASE(SF_VC_XVV_SE)
+  NODE_NAME_CASE(SF_VC_IVV_SE)
+  NODE_NAME_CASE(SF_VC_VVV_SE)
+  NODE_NAME_CASE(SF_VC_FVV_SE)
+  NODE_NAME_CASE(SF_VC_XVW_SE)
+  NODE_NAME_CASE(SF_VC_IVW_SE)
+  NODE_NAME_CASE(SF_VC_VVW_SE)
+  NODE_NAME_CASE(SF_VC_FVW_SE)
+  NODE_NAME_CASE(SF_VC_V_X_SE)
+  NODE_NAME_CASE(SF_VC_V_I_SE)
+  NODE_NAME_CASE(SF_VC_V_XV_SE)
+  NODE_NAME_CASE(SF_VC_V_IV_SE)
+  NODE_NAME_CASE(SF_VC_V_VV_SE)
+  NODE_NAME_CASE(SF_VC_V_FV_SE)
+  NODE_NAME_CASE(SF_VC_V_XVV_SE)
+  NODE_NAME_CASE(SF_VC_V_IVV_SE)
+  NODE_NAME_CASE(SF_VC_V_VVV_SE)
+  NODE_NAME_CASE(SF_VC_V_FVV_SE)
+  NODE_NAME_CASE(SF_VC_V_XVW_SE)
+  NODE_NAME_CASE(SF_VC_V_IVW_SE)
+  NODE_NAME_CASE(SF_VC_V_VVW_SE)
+  NODE_NAME_CASE(SF_VC_V_FVW_SE)
   }
   // clang-format on
   return nullptr;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 18f58057558166..97360a8ae9e5f2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -410,6 +410,77 @@ enum NodeType : unsigned {
   STRICT_VFROUND_NOEXCEPT_VL,
   LAST_RISCV_STRICTFP_OPCODE = STRICT_VFROUND_NOEXCEPT_VL,
 
+  SF_VC_X_SE_E8MF8,
+  SF_VC_X_SE_E8MF4,
+  SF_VC_X_SE_E8MF2,
+  SF_VC_X_SE_E8M1,
+  SF_VC_X_SE_E8M2,
+  SF_VC_X_SE_E8M4,
+  SF_VC_X_SE_E8M8,
+  SF_VC_X_SE_E16MF4,
+  SF_VC_X_SE_E16MF2,
+  SF_VC_X_SE_E16M1,
+  SF_VC_X_SE_E16M2,
+  SF_VC_X_SE_E16M4,
+  SF_VC_X_SE_E16M8,
+  SF_VC_X_SE_E32MF2,
+  SF_VC_X_SE_E32M1,
+  SF_VC_X_SE_E32M2,
+  SF_VC_X_SE_E32M4,
+  SF_VC_X_SE_E32M8,
+  SF_VC_X_SE_E64M1,
+  SF_VC_X_SE_E64M2,
+  SF_VC_X_SE_E64M4,
+  SF_VC_X_SE_E64M8,
+  SF_VC_I_SE_E8MF8,
+  SF_VC_I_SE_E8MF4,
+  SF_VC_I_SE_E8MF2,
+  SF_VC_I_SE_E8M1,
+  SF_VC_I_SE_E8M2,
+  SF_VC_I_SE_E8M4,
+  SF_VC_I_SE_E8M8,
+  SF_VC_I_SE_E16MF4,
+  SF_VC_I_SE_E16MF2,
+  SF_VC_I_SE_E16M1,
+  SF_VC_I_SE_E16M2,
+  SF_VC_I_SE_E16M4,
+  SF_VC_I_SE_E16M8,
+  SF_VC_I_SE_E32MF2,
+  SF_VC_I_SE_E32M1,
+  SF_VC_I_SE_E32M2,
+  SF_VC_I_SE_E32M4,
+  SF_VC_I_SE_E32M8,
+  SF_VC_I_SE_E64M1,
+  SF_VC_I_SE_E64M2,
+  SF_VC_I_SE_E64M4,
+  SF_VC_I_SE_E64M8,
+  SF_VC_XV_SE,
+  SF_VC_IV_SE,
+  SF_VC_VV_SE,
+  SF_VC_FV_SE,
+  SF_VC_XVV_SE,
+  SF_VC_IVV_SE,
+  SF_VC_VVV_SE,
+  SF_VC_FVV_SE,
+  SF_VC_XVW_SE,
+  SF_VC_IVW_SE,
+  SF_VC_VVW_SE,
+  SF_VC_FVW_SE,
+  SF_VC_V_X_SE,
+  SF_VC_V_I_SE,
+  SF_VC_V_XV_SE,
+  SF_VC_V_IV_SE,
+  SF_VC_V_VV_SE,
+  SF_VC_V_FV_SE,
+  SF_VC_V_XVV_SE,
+  SF_VC_V_IVV_SE,
+  SF_VC_V_VVV_SE,
+  SF_VC_V_FVV_SE,
+  SF_VC_V_XVW_SE,
+  SF_VC_V_IVW_SE,
+  SF_VC_V_VVW_SE,
+  SF_VC_V_FVW_SE,
+
   // WARNING: Do not add anything in the end unless you want the node to
   // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
   // opcodes will be thought as target memory ops!
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 31f832dfd84cee..d22f98d693b1bf 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -221,8 +221,8 @@ let Predicates = [HasVendorXSfvfnrclipxfqf], DecoderNamespace = "XSfvfnrclipxfqf
   def VFNRCLIP_XU_F_QF : CustomSiFiveVFNRCLIP<0b100010, OPFVF, "sf.vfnrclip.xu.f.qf">;
   def VFNRCLIP_X_F_QF : CustomSiFiveVFNRCLIP<0b100011, OPFVF, "sf.vfnrclip.x.f.qf">;
 }
-class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class,
-                  bit HasSideEffect = 1> :
+
+class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class> :
       Pseudo<(outs),
              (ins OpClass:$op1, payload5:$rs2, payload5:$rd, RS1Class:$r1,
                   AVL:$vl, ixlenimm:$sew), []>,
@@ -231,12 +231,11 @@ class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class,
   let mayStore = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
-  let hasSideEffects = HasSideEffect;
+  let hasSideEffects = 0;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
-class VPseudoVC_XV<Operand OpClass, VReg RS2Class, DAGOperand RS1Class,
-                   bit HasSideEffect = 1> :
+class VPseudoVC_XV<Operand OpClass, VReg RS2Class, DAGOperand RS1Class> :
       Pseudo<(outs),
              (ins OpClass:$op1, payload5:$rd, RS2Class:$rs2, RS1Class:$r1,
                   AVL:$vl, ixlenimm:$sew), []>,
@@ -245,12 +244,12 @@ class VPseudoVC_XV<Operand OpClass, VReg RS2Class, DAGOperand RS1Class,
   let mayStore = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
-  let hasSideEffects = HasSideEffect;
+  let hasSideEffects = 0;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
 class VPseudoVC_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
-                    DAGOperand RS1Class, bit HasSideEffect = 1> :
+                    DAGOperand RS1Class> :
       Pseudo<(outs),
              (ins OpClass:$op1, RDClass:$rd, RS2Class:$rs2, RS1Class:$r1,
                   AVL:$vl, ixlenimm:$sew), []>,
@@ -259,12 +258,11 @@ class VPseudoVC_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
   let mayStore = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
-  let hasSideEffects = HasSideEffect;
+  let hasSideEffects = 0;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
-class VPseudoVC_V_X<Operand OpClass, VReg RDClass, DAGOperand RS1Class,
-                    bit HasSideEffect = 1> :
+class VPseudoVC_V_X<Operand OpClass, VReg RDClass, DAGOperand RS1Class> :
       Pseudo<(outs RDClass:$rd),
              (ins OpClass:$op1, payload5:$rs2, RS1Class:$r1,
                   AVL:$vl, ixlenimm:$sew), []>,
@@ -273,12 +271,12 @@ class VPseudoVC_V_X<Operand OpClass, VReg RDClass, DAGOperand RS1Class,
   let mayStore = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
-  let hasSideEffects = HasSideEffect;
+  let hasSideEffects = 0;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
 class VPseudoVC_V_XV<Operand OpClass, VReg RDClass, VReg RS2Class,
-                     DAGOperand RS1Class, bit HasSideEffect = 1> :
+                     DAGOperand RS1Class> :
       Pseudo<(outs RDClass:$rd),
              (ins OpClass:$op1, RS2Class:$rs2, RS1Class:$r1,
                   AVL:$vl, ixlenimm:$sew), []>,
@@ -287,12 +285,12 @@ class VPseudoVC_V_XV<Operand OpClass, VReg RDClass, VReg RS2Class,
   let mayStore = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
-  let hasSideEffects = HasSideEffect;
+  let hasSideEffects = 0;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
 class VPseudoVC_V_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
-                      DAGOperand RS1Class, bit HasSideEffect = 1> :
+                      DAGOperand RS1Class> :
       Pseudo<(outs RDClass:$rd),
              (ins OpClass:$op1, RDClass:$rs3, RS2Class:$rs2, RS1Class:$r1,
                   AVL:$vl, ixlenimm:$sew), []>,
@@ -301,44 +299,52 @@ class VPseudoVC_V_XVV<Operand OpClass, VReg RDClass, VReg RS2Class,
   let mayStore = 0;
   let HasVLOp = 1;
   let HasSEWOp = 1;
-  let hasSideEffects = HasSideEffect;
+  let hasSideEffects = 0;
   let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
 }
 
 multiclass VPseudoVC_X<LMULInfo m, DAGOperand RS1Class,
                        Operand OpClass = payload2> {
   let VLMul = m.value in {
-    def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_X<OpClass, RS1Class>;
-    def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
-    def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class, 0>;
+    let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
+      def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_X<OpClass, RS1Class>;
+      def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
+    }
+    def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_X<OpClass, m.vrclass, RS1Class>;
   }
 }
 
 multiclass VPseudoVC_XV<LMULInfo m, DAGOperand RS1Class,
                         Operand OpClass = payload2> {
   let VLMul = m.value in {
-    def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>;
-    def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
-    def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class, 0>;
+    let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
+      def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XV<OpClass, m.vrclass, RS1Class>;
+      def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+    }
+    def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XV<OpClass, m.vrclass, m.vrclass, RS1Class>;
   }
 }
 
 multiclass VPseudoVC_XVV<LMULInfo m, DAGOperand RS1Class,
                          Operand OpClass = payload2> {
   let VLMul = m.value in {
-    def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
-    def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
-    def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class, 0>;
+    let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in {
+      def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+      def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
+    }
+    def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.vrclass, m.vrclass, RS1Class>;
   }
 }
 
 multiclass VPseudoVC_XVW<LMULInfo m, DAGOperand RS1Class,
                          Operand OpClass = payload2> {
   let VLMul = m.value in {
+    let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in
     def "PseudoVC_" # NAME # "_SE_" # m.MX : VPseudoVC_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
     let Constraints = "@earlyclobber $rd, $rd = $rs3" in {
+      let Defs = [VCIX_STATE], Uses = [VCIX_STATE] in
       def "PseudoVC_V_" # NAME # "_SE_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
-      def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class, 0>;
+      def "PseudoVC_V_" # NAME # "_" # m.MX : VPseudoVC_V_XVV<OpClass, m.wvrclass, m.vrclass, RS1Class>;
     }
   }
 }
@@ -428,6 +434,149 @@ let Predicates = [HasVendorXSfvfnrclipxfqf] in {
   defm VFNRCLIP_X_F_QF : VPseudoSiFiveVFNRCLIP;
 }
 
+// SDNode
+def SDT_SF_VC_X : SDTypeProfile<0, 5, [SDTCisSameAs<0, 1>,
+                                       SDTCisVT<0, XLenVT>,
+                                       SDTCisSameAs<0, 2>,
+                                       SDTCisSameAs<0, 3>,
+                                       SDTCisSameAs<0, 4>]>;
+
+def SDT_SF_VC_V_X : SDTypeProfile<1, 4, [SDTCisVec<0>,
+                                         SDTCisVT<1, XLenVT>,
+                                         SDTCisSameAs<1, 2>,
+                                         SDTCisSameAs<1, 3>,
+                                         SDTCisSameAs<1, 4>]>;
+
+def SDT_SF_VC_XV : SDTypeProfile<0, 5, [SDTCisSameAs<0, 1>,
+                                        SDTCisVec<2>,
+                                        SDTCisSameAs<0, 4>,
+                                        SDTCisVT<0, XLenVT>]>;
+
+def SDT_SF_VC_V_XV : SDTypeProfile<1, 4, [SDTCisVec<0>,
+                                          SDTCisVT<1, XLenVT>,
+                                          SDTCisSameAs<0, 2>,
+                                          SDTCisSameAs<1, 4>]>;
+
+def SDT_SF_VC_XVV : SDTypeProfile<0, 5, [SDTCisVT<0, XLenVT>,
+                                         SDTCisVec<1>,
+                                         SDTCisSameAs<1, 2>,
+                                         SDTCisSameAs<0, 4>]>;
+
+def SDT_SF_VC_V_XVV : SDTypeProfile<1, 5, [SDTCisVec<0>,
+                                           SDTCisVT<1, XLenVT>,
+                                           SDTCisSameAs<0, 2>,
+                                           SDTCisSameAs<0, 3>,
+                                           SDTCisSameAs<1, 5>]>;
+
+def SDT_SF_VC_XVW : SDTypeProfile<0, 5, [SDTCisVT<0, XLenVT>,
+                                         SDTCisVec<1>, SDTCisVec<2>,
+                                         SDTCisSameAs<0, 4>]>;
+
+def SDT_SF_VC_V_XVW : SDTypeProfile<1, 5, [SDTCisVec<0>,
+                                           SDTCisVT<1, XLenVT>,
+                                           SDTCisSameAs<0, 2>,
+                                           SDTCisVec<3>,
+                                           SDTCisSameAs<1, 5>]>;
+
+foreach vti = AllIntegerVectors in {
+  def sf_vc_x_e#vti.SEW#!tolower(vti.LMul.MX) : SDNode<"RISCVISD::SF_VC_X_SE_E"#vti.SEW#vti.LMul.MX, SDT_SF_VC_X, [SDNPHasChain]>;
+  def sf_vc_i_e#vti.SEW#!tolower(vti.LMul.MX) : SDNode<"RISCVISD::SF_VC_I_SE_E"#vti.SEW#vti.LMul.MX, SDT_SF_VC_X, [SDNPHasChain]>;
+}
+def sf_vc_v_x_se : SDNode<"RISCVISD::SF_VC_V_X_SE", SDT_SF_VC_V_X, [SDNPHasChain]>;
+def sf_vc_v_i_se : SDNode<"RISCVISD::SF_VC_V_I_SE", SDT_SF_VC_V_X, [SDNPHasChain]>;
+def sf_vc_vv_se : SDNode<"RISCVISD::SF_VC_VV_SE", SDT_SF_VC_XV, [SDNPHasChain]>;
+def sf_vc_xv_se : SDNode<"RISCVISD::SF_VC_XV_SE", SDT_SF_VC_XV, [SDNPHasChain]>;
+def sf_vc_iv_se : SDNode<"RISCVISD::SF_VC_IV_SE", SDT_SF_VC_XV, [SDNPHasChain]>;
+def sf_vc_fv_se : SDNode<"RISCVISD::SF_VC_FV_SE", SDT_SF_VC_XV, [SDNPHasChain]>;
+def sf_vc_v_vv_se : SDNode<"RISCVISD::SF_VC_V_VV_SE", SDT_SF_VC_V_XV, [SDNPHasChain]>;
+def sf_vc_v_xv_se : SDNode<"RISCVISD::SF_VC_V_XV_SE", SDT_SF_VC_V_XV, [SDNPHasChain]>;
+def sf_vc_v_iv_se : SDNode<"RISCVISD::SF_VC_V_IV_SE", SDT_SF_VC_V_XV, [SDNPHasChain]>;
+def sf_vc_v_fv_se : SDNode<"RISCVISD::SF_VC_V_FV_SE", SDT_SF_VC_V_XV, [SDNPHasChain]>;
+def sf_vc_vvv_se : SDNode<"RISCVISD::SF_VC_VVV_SE", SDT_SF_VC_XVV, [SDNPHasChain]>;
+def sf_vc_xvv_se : SDNode<"RISCVISD::SF_VC_XVV_SE", SDT_SF_VC_XVV, [SDNPHasChain]>;
+def sf_vc_ivv_se : SDNode<"RISCVISD::SF_VC_IVV_SE", SDT_SF_VC_XVV, [SDNPHasChain]>;
+def sf_vc_fvv_se : SDNode<"RISCVISD::SF_VC_FVV_SE", SDT_SF_VC_XVV, [SDNPHasChain]>;
+def sf_vc_v_vvv_se : SDNode<"RISCVISD::SF_VC_V_VVV_SE", SDT_SF_VC_V_XVV, [SDNPHasChain]>;
+def sf_vc_v_xvv_se : SDNode<"RISCVISD::SF_VC_V_XVV_SE", SDT_SF_VC_V_XVV, [SDNPHasChain]>;
+def sf_vc_v_ivv_se : SDNode<"RISCVISD::SF_VC_V_IVV_SE", SDT_SF_VC_V_XVV, [SDNPHasChain]>;
+def sf_vc_v_fvv_se : SDNode<"RISCVISD::SF_VC_V_FVV_SE", SDT_SF_VC_V_XVV, [SDNPHasChain]>;
+def sf_vc_vvw_se : SDNode<"RISCVISD::SF_VC_VVW_SE", SDT_SF_VC_XVW, [SDNPHasChain]>;
+def sf_vc_xvw_se : SDNode<"RISCVISD::SF_VC_XVW_SE", SDT_SF_VC_XVW, [SDNPHasChain]>;
+def sf_vc_ivw_se : SDNode<"RISCVISD::SF_VC_IVW_SE", SDT_SF_VC_XVW, [SDNPHasChain]>;
+def sf_vc_fvw_se : SDNode<"RISCVISD::SF_VC_FVW_SE", SDT_SF_VC_XVW, [SDNPHasChain]>;
+def sf_vc_v_vvw_se : SDNode<"RISCVISD::SF_VC_V_VVW_SE", SDT_SF_VC_V_XVW, [SDNPHasChain]>;
+def sf_vc_v_xvw_se : SDNode<"RISCVISD::SF_VC_V_XVW_SE", SDT_SF_VC_V_XVW, [SDNPHasChain]>;
+def sf_vc_v_ivw_se : SDNode<"RISCVISD::SF_VC_V_IVW_SE", SDT_SF_VC_V_XVW, [SDNPHasChain]>;
+def sf_vc_v_fvw_se : SDNode<"RISCVISD::SF_VC_V_FVW_SE", SDT_SF_VC_V_XVW, [SDNPHasChain]>;
+
+class VPatVC_OP4_ISD<SDPatternOperator op,
+                     string inst,
+                     ValueType op2_type,
+                     ValueType op3_type,
+                     ValueType op4_type,
+                     int sew,
+                     DAGOperand op2_kind,
+                     DAGOperand op3_kind,
+                     DAGOperand op4_kind,
+                     Operand op1_kind = payload2> :
+  Pat<(op
+       (XLenVT   op1_kind:$op1),
+       (op2_type op2_kind:$op2),
+       (op3_type op3_kind:$op3),
+       (op4_type op4_kind:$op4),
+       VLOpFrag),
+      (!cast<Instruction>(inst)
+       (XLenVT   op1_kind:$op1),
+       (op2_type op2_kind:$op2),
+       (op3_type op3_kind:$op3),
+       (op4_type op4_kind:$op4),
+       GPR:$vl, sew)>;
+
+class VPatVC_V_OP4_ISD<SDPatternOperator op,
+                       string inst,
+                       ValueType result_type,
+                       ValueType op2_type,
+                       ValueType op3_type,
+                       ValueType op4_type,
+                       int sew,
+                       DAGOperand op2_kind,
+                       DAGOperand op3_kind,
+                       DAGOperand op4_kind,
+                       Operand op1_kind = payload2> :
+  Pat<(result_type (op
+                    (XLenVT   op1_kind:$op1),
+                    (op2_type op2_kind:$op2),
+                    (op3_type op3_kind:$op3),
+                    (op4_type op4_kind:$op4),
+                    VLOpFrag)),
+                   (!cast<Instruction>(inst)
+                    (XLenVT   op1_kind:$op1),
+                    (op2_type op2_kind:$op2),
+                    (op3_type op3_kind:$op3),
+                    (op4_type op4_kind:$op4),
+                    GPR:$vl, sew)>;
+
+
+class VPatVC_V_OP3_ISD<SDPatternOperator op,
+                       string inst,
+                       ValueType result_type,
+                       ValueType op2_type,
+                       ValueType op3_type,
+                       int sew,
+                       DAGOperand op2_kind,
+                       DAGOperand op3_kind,
+                       Operand op1_kind = payload2> :
+  Pat<(result_type (op
+                    (XLenVT   op1_kind:$op1),
+                    (op2_type op2_kind:$op2),
+                    (op3_type op3_kind:$op3),
+                    VLOpFrag)),
+                   (!cast<Instruction>(inst)
+                    (XLenVT   op1_kind:$op1),
+                    (op2_type op2_kind:$op2),
+                    (op3_type op3_kind:$op3),
+                    GPR:$vl, sew)>;
+
 class VPatVC_OP4<string intrinsic_name,
                  string inst,
                  ValueType op2_type,
@@ -497,14 +646,14 @@ class VPatVC_V_OP3<string intrinsic_name,
 
 multiclass VPatVC_X<string intrinsic_suffix, string instruction_suffix,
                     VTypeInfo vti, ValueType type, DAGOperand kind> {
-  def : VPatVC_OP4<"int_riscv_sf_vc_" # intrinsic_suffix # "_se_e" # vti.SEW # !tolower(vti.LMul.MX),
-                   "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
-                   XLenVT, XLenVT, type, vti.Log2SEW,
-                   payload5, payload5, kind>;
-  def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix # "_se",
-                     "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
-                     vti.Vector, XLenVT, type, vti.Log2SEW,
-                     payload5, kind>;
+  def : VPatVC_OP4_ISD<!cast<SDPatternOperator>("sf_vc_" # intrinsic_suffix # "_e" # vti.SEW # !tolower(vti.LMul.MX)),
+                       "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+                       XLenVT, XLenVT, type, vti.Log2SEW,
+                       payload5, payload5, kind>;
+  def : VPatVC_V_OP3_ISD<!cast<SDPatternOperator>("sf_vc_v_" # intrinsic_suffix # "_se"),
+                         "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+                         vti.Vector, XLenVT, type, vti.Log2SEW,
+                         payload5, kind>;
   def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix,
                      "PseudoVC_V_" # instruction_suffix # "_" # vti.LMul.MX,
                      vti.Vector, XLenVT, type, vti.Log2SEW,
@@ -514,14 +663,14 @@ multiclass VPatVC_X<string intrinsic_suffix, string instruction_suffix,
 multiclass VPatVC_XV<string intrinsic_suffix, string instruction_suffix,
                      VTypeInfo vti, ValueType type, DAGOperand kind,
                      Operand op1_kind = payload2> {
-  def : VPatVC_OP4<"int_riscv_sf_vc_" # intrinsic_suffix # "_se",
+  def : VPatVC_OP4_ISD<!cast<SDPatternOperator>("sf_vc_" # intrinsic_suffix # "_se"),
                    "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
                    XLenVT, vti.Vector, type, vti.Log2SEW,
                    payload5, vti.RegClass, kind, op1_kind>;
-  def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix # "_se",
-                     "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
-                     vti.Vector, vti.Vector, type, vti.Log2SEW,
-                     vti.RegClass, kind, op1_kind>;
+  def : VPatVC_V_OP3_ISD<!cast<SDPatternOperator>("sf_vc_v_" # intrinsic_suffix # "_se"),
+                         "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
+                         vti.Vector, vti.Vector, type, vti.Log2SEW,
+                         vti.RegClass, kind, op1_kind>;
   def : VPatVC_V_OP3<"int_riscv_sf_vc_v_" # intrinsic_suffix,
                      "PseudoVC_V_" # instruction_suffix # "_" # vti.LMul.MX,
                      vti.Vector, vti.Vector, type, vti.Log2SEW,
@@ -531,11 +680,11 @@ multiclass VPatVC_XV<string intrinsic_suffix, string instruction_suffix,
 multiclass VPatVC_XVV<string intrinsic_suffix, string instruction_suffix,
                       VTypeInfo wti, VTypeInfo vti, ValueType type, DAGOperand kind,
                       Operand op1_kind = payload2> {
-  def : VPatVC_OP4<"int_riscv_sf_vc_" # intrinsic_suffix # "_se",
+  def : VPatVC_OP4_ISD<!cast<SDPatternOperator>("sf_vc_" # intrinsic_suffix # "_se"),
                    "PseudoVC_" # instruction_suffix # "_SE_" # vti.LMul.MX,
                    wti.Vector, vti.Vector, type, vti.Log2SEW,
                    wti.RegClass, vti.RegClass, kind, op1_kind>;
-  def : VPatVC_V_OP4<"int_riscv_sf_vc_v_" # intrinsic_suffix # "_se",
+  def : VPatVC_V_OP4_ISD<!cast<SDPatternOperator>("sf_vc_v_" # intrinsic_suffix # "_se"),
                      "PseudoVC_V_" # instruction_suffix # "_SE_" # vti.LMul.MX,
                      wti.Vector, wti.Vector, vti.Vector, type, vti.Log2SEW,
                      wti.RegClass, vti.RegClass, kind, op1_kind>;
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
index 24f8d600f1eafc..889fab494a53af 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp
@@ -120,6 +120,9 @@ BitVector RISCVRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   markSuperRegs(Reserved, RISCV::FRM);
   markSuperRegs(Reserved, RISCV::FFLAGS);
 
+  // SiFive VCIX state registers.
+  markSuperRegs(Reserved, RISCV::VCIX_STATE);
+
   if (MF.getFunction().getCallingConv() == CallingConv::GRAAL) {
     if (Subtarget.isRVE())
       report_fatal_error("Graal reserved registers do not exist in RVE");
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 840fd149d68198..44509039956c21 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -598,3 +598,6 @@ def FRM    : RISCVReg<0, "frm">;
 
 // Shadow Stack register
 def SSP    : RISCVReg<0, "ssp">;
+
+// Dummy VCIX state register
+def VCIX_STATE : RISCVReg<0, "vcix_state">;
diff --git a/llvm/test/CodeGen/RISCV/pr69586.ll b/llvm/test/CodeGen/RISCV/pr69586.ll
index ef91334c5ff004..2d5fce2ca4970e 100644
--- a/llvm/test/CodeGen/RISCV/pr69586.ll
+++ b/llvm/test/CodeGen/RISCV/pr69586.ll
@@ -7,21 +7,21 @@
 define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-LABEL: test:
 ; NOREMAT:       # %bb.0:
-; NOREMAT-NEXT:    addi sp, sp, -368
-; NOREMAT-NEXT:    .cfi_def_cfa_offset 368
-; NOREMAT-NEXT:    sd ra, 360(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s0, 352(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s1, 344(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s2, 336(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s3, 328(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s4, 320(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s5, 312(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s6, 304(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s7, 296(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s8, 288(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s9, 280(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s10, 272(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    sd s11, 264(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    addi sp, sp, -400
+; NOREMAT-NEXT:    .cfi_def_cfa_offset 400
+; NOREMAT-NEXT:    sd ra, 392(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s0, 384(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s1, 376(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s2, 368(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s3, 360(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s4, 352(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s5, 344(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s6, 336(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s7, 328(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s8, 320(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s9, 312(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s10, 304(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd s11, 296(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    .cfi_offset ra, -8
 ; NOREMAT-NEXT:    .cfi_offset s0, -16
 ; NOREMAT-NEXT:    .cfi_offset s1, -24
@@ -35,6 +35,11 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    .cfi_offset s9, -88
 ; NOREMAT-NEXT:    .cfi_offset s10, -96
 ; NOREMAT-NEXT:    .cfi_offset s11, -104
+; NOREMAT-NEXT:    csrr a2, vlenb
+; NOREMAT-NEXT:    li a3, 6
+; NOREMAT-NEXT:    mul a2, a2, a3
+; NOREMAT-NEXT:    sub sp, sp, a2
+; NOREMAT-NEXT:    .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x90, 0x03, 0x22, 0x11, 0x06, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 400 + 6 * vlenb
 ; NOREMAT-NEXT:    li a2, 32
 ; NOREMAT-NEXT:    vsetvli zero, a2, e32, m2, ta, ma
 ; NOREMAT-NEXT:    vle32.v v8, (a0)
@@ -50,670 +55,728 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; NOREMAT-NEXT:    vle32.v v10, (a2)
 ; NOREMAT-NEXT:    li a2, 1
 ; NOREMAT-NEXT:    slli a2, a2, 11
-; NOREMAT-NEXT:    sd a2, 256(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a2, 272(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a0, a2
 ; NOREMAT-NEXT:    vle32.v v12, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
 ; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    li a4, 5
-; NOREMAT-NEXT:    slli a2, a4, 9
-; NOREMAT-NEXT:    sd a2, 248(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    li a5, 5
+; NOREMAT-NEXT:    slli a2, a5, 9
+; NOREMAT-NEXT:    sd a2, 264(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a2, a0, a2
 ; NOREMAT-NEXT:    vle32.v v14, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
 ; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    li a5, 3
-; NOREMAT-NEXT:    slli a2, a5, 10
-; NOREMAT-NEXT:    sd a2, 240(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    li a2, 3
+; NOREMAT-NEXT:    slli a3, a2, 10
+; NOREMAT-NEXT:    sd a3, 256(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v12, (a3)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    li a3, 7
-; NOREMAT-NEXT:    slli a2, a3, 9
-; NOREMAT-NEXT:    sd a2, 232(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v14, (a2)
+; NOREMAT-NEXT:    vle32.v v8, (a3)
+; NOREMAT-NEXT:    li a4, 7
+; NOREMAT-NEXT:    slli a3, a4, 9
+; NOREMAT-NEXT:    sd a3, 248(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v14, (a3)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    lui a2, 1
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v12, (a2)
+; NOREMAT-NEXT:    vle32.v v10, (a3)
+; NOREMAT-NEXT:    lui a3, 1
+; NOREMAT-NEXT:    add a3, a0, a3
+; NOREMAT-NEXT:    vle32.v v12, (a3)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    li a2, 9
-; NOREMAT-NEXT:    slli a6, a2, 9
-; NOREMAT-NEXT:    sd a6, 224(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    vle32.v v8, (a3)
+; NOREMAT-NEXT:    li a3, 9
+; NOREMAT-NEXT:    slli a6, a3, 9
+; NOREMAT-NEXT:    sd a6, 240(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a6, a0, a6
 ; NOREMAT-NEXT:    vle32.v v14, (a6)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
 ; NOREMAT-NEXT:    vle32.v v10, (a6)
-; NOREMAT-NEXT:    slli a6, a4, 10
-; NOREMAT-NEXT:    sd a6, 216(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    slli a6, a5, 10
+; NOREMAT-NEXT:    sd a6, 232(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a6, a0, a6
 ; NOREMAT-NEXT:    vle32.v v12, (a6)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
 ; NOREMAT-NEXT:    vle32.v v8, (a6)
 ; NOREMAT-NEXT:    li s8, 11
 ; NOREMAT-NEXT:    slli a6, s8, 9
-; NOREMAT-NEXT:    sd a6, 208(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a6, 224(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a6, a0, a6
 ; NOREMAT-NEXT:    vle32.v v14, (a6)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
 ; NOREMAT-NEXT:    vle32.v v10, (a6)
-; NOREMAT-NEXT:    slli a5, a5, 11
-; NOREMAT-NEXT:    sd a5, 200(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a5, a0, a5
-; NOREMAT-NEXT:    vle32.v v12, (a5)
+; NOREMAT-NEXT:    slli a2, a2, 11
+; NOREMAT-NEXT:    sd a2, 216(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v12, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a5)
+; NOREMAT-NEXT:    vle32.v v8, (a2)
 ; NOREMAT-NEXT:    li s2, 13
-; NOREMAT-NEXT:    slli a5, s2, 9
-; NOREMAT-NEXT:    sd a5, 192(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a5, a0, a5
-; NOREMAT-NEXT:    vle32.v v14, (a5)
+; NOREMAT-NEXT:    slli a2, s2, 9
+; NOREMAT-NEXT:    sd a2, 208(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v14, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a5)
-; NOREMAT-NEXT:    slli a5, a3, 10
-; NOREMAT-NEXT:    sd a5, 184(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a5, a0, a5
-; NOREMAT-NEXT:    vle32.v v12, (a5)
+; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    slli a2, a4, 10
+; NOREMAT-NEXT:    sd a2, 200(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a2, a0, a2
+; NOREMAT-NEXT:    vle32.v v12, (a2)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a5)
-; NOREMAT-NEXT:    li t0, 15
-; NOREMAT-NEXT:    slli a5, t0, 9
-; NOREMAT-NEXT:    sd a5, 176(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a5, a0, a5
-; NOREMAT-NEXT:    vle32.v v14, (a5)
+; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    li a2, 15
+; NOREMAT-NEXT:    slli a6, a2, 9
+; NOREMAT-NEXT:    sd a6, 192(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v26, (a6)
 ; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a5)
-; NOREMAT-NEXT:    lui a5, 2
+; NOREMAT-NEXT:    vle32.v v16, (a6)
+; NOREMAT-NEXT:    lui a6, 2
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v28, (a6)
+; NOREMAT-NEXT:    vle32.v v10, (a6)
+; NOREMAT-NEXT:    li a6, 17
+; NOREMAT-NEXT:    slli a6, a6, 9
+; NOREMAT-NEXT:    sd a6, 184(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    li t0, 17
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v30, (a6)
+; NOREMAT-NEXT:    vle32.v v18, (a6)
+; NOREMAT-NEXT:    slli a6, a3, 10
+; NOREMAT-NEXT:    sd a6, 176(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v0, (a6)
+; NOREMAT-NEXT:    vle32.v v20, (a6)
+; NOREMAT-NEXT:    li a6, 19
+; NOREMAT-NEXT:    slli a6, a6, 9
+; NOREMAT-NEXT:    sd a6, 168(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    li a7, 19
+; NOREMAT-NEXT:    add a6, a0, a6
+; NOREMAT-NEXT:    vle32.v v2, (a6)
+; NOREMAT-NEXT:    vle32.v v22, (a6)
+; NOREMAT-NEXT:    slli a5, a5, 11
+; NOREMAT-NEXT:    sd a5, 160(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v4, (a5)
 ; NOREMAT-NEXT:    vle32.v v12, (a5)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a5)
-; NOREMAT-NEXT:    li a5, 17
-; NOREMAT-NEXT:    slli a5, a5, 9
-; NOREMAT-NEXT:    sd a5, 168(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    li a7, 17
+; NOREMAT-NEXT:    li s10, 21
+; NOREMAT-NEXT:    slli a5, s10, 9
+; NOREMAT-NEXT:    sd a5, 152(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v24, (a5)
 ; NOREMAT-NEXT:    vle32.v v14, (a5)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a5)
-; NOREMAT-NEXT:    slli a5, a2, 10
-; NOREMAT-NEXT:    sd a5, 160(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v26
+; NOREMAT-NEXT:    slli a5, s8, 10
+; NOREMAT-NEXT:    sd a5, 144(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a5, a0, a5
-; NOREMAT-NEXT:    vle32.v v12, (a5)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v26, (a5)
 ; NOREMAT-NEXT:    vle32.v v8, (a5)
-; NOREMAT-NEXT:    li a5, 19
-; NOREMAT-NEXT:    slli a5, a5, 9
-; NOREMAT-NEXT:    sd a5, 152(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    li a6, 19
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v28
+; NOREMAT-NEXT:    li s6, 23
+; NOREMAT-NEXT:    slli a5, s6, 9
+; NOREMAT-NEXT:    sd a5, 136(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a5, a0, a5
-; NOREMAT-NEXT:    vle32.v v14, (a5)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v28, (a5)
+; NOREMAT-NEXT:    vle32.v v16, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v30
+; NOREMAT-NEXT:    lui a5, 3
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v30, (a5)
 ; NOREMAT-NEXT:    vle32.v v10, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v0
+; NOREMAT-NEXT:    li s3, 25
+; NOREMAT-NEXT:    slli a5, s3, 9
+; NOREMAT-NEXT:    sd a5, 128(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v0, (a5)
+; NOREMAT-NEXT:    vle32.v v18, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v2
+; NOREMAT-NEXT:    slli a5, s2, 10
+; NOREMAT-NEXT:    sd a5, 120(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v2, (a5)
+; NOREMAT-NEXT:    vle32.v v20, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v4
+; NOREMAT-NEXT:    li t5, 27
+; NOREMAT-NEXT:    slli a5, t5, 9
+; NOREMAT-NEXT:    sd a5, 112(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    add a5, a0, a5
+; NOREMAT-NEXT:    vle32.v v4, (a5)
+; NOREMAT-NEXT:    vle32.v v22, (a5)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v24
 ; NOREMAT-NEXT:    slli a4, a4, 11
-; NOREMAT-NEXT:    sd a4, 144(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a4, 104(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
+; NOREMAT-NEXT:    vle32.v v24, (a4)
 ; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a4)
-; NOREMAT-NEXT:    li s10, 21
-; NOREMAT-NEXT:    slli a4, s10, 9
-; NOREMAT-NEXT:    sd a4, 136(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v26
+; NOREMAT-NEXT:    li t2, 29
+; NOREMAT-NEXT:    slli a4, t2, 9
+; NOREMAT-NEXT:    sd a4, 96(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
+; NOREMAT-NEXT:    vle32.v v26, (a4)
 ; NOREMAT-NEXT:    vle32.v v14, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a4)
-; NOREMAT-NEXT:    slli a4, s8, 10
-; NOREMAT-NEXT:    sd a4, 128(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v28
+; NOREMAT-NEXT:    slli a4, a2, 10
+; NOREMAT-NEXT:    sd a4, 88(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
-; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v28, (a4)
 ; NOREMAT-NEXT:    vle32.v v8, (a4)
-; NOREMAT-NEXT:    li s6, 23
-; NOREMAT-NEXT:    slli a4, s6, 9
-; NOREMAT-NEXT:    sd a4, 120(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a4, a0, a4
-; NOREMAT-NEXT:    vle32.v v14, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a4)
-; NOREMAT-NEXT:    lui a4, 3
+; NOREMAT-NEXT:    csrr a4, vlenb
+; NOREMAT-NEXT:    slli a4, a4, 2
+; NOREMAT-NEXT:    add a4, sp, a4
+; NOREMAT-NEXT:    addi a4, a4, 288
+; NOREMAT-NEXT:    vs2r.v v8, (a4) # Unknown-size Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v30
+; NOREMAT-NEXT:    li a5, 31
+; NOREMAT-NEXT:    slli a4, a5, 9
+; NOREMAT-NEXT:    sd a4, 80(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
-; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; NOREMAT-NEXT:    vle32.v v30, (a4)
+; NOREMAT-NEXT:    vle32.v v16, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v0
+; NOREMAT-NEXT:    lui a6, 4
+; NOREMAT-NEXT:    add a4, a0, a6
+; NOREMAT-NEXT:    vle32.v v0, (a4)
 ; NOREMAT-NEXT:    vle32.v v8, (a4)
-; NOREMAT-NEXT:    li s3, 25
-; NOREMAT-NEXT:    slli a4, s3, 9
-; NOREMAT-NEXT:    sd a4, 112(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    csrr a4, vlenb
+; NOREMAT-NEXT:    slli a4, a4, 1
+; NOREMAT-NEXT:    add a4, sp, a4
+; NOREMAT-NEXT:    addi a4, a4, 288
+; NOREMAT-NEXT:    vs2r.v v8, (a4) # Unknown-size Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v2
+; NOREMAT-NEXT:    addiw a4, a6, 512
+; NOREMAT-NEXT:    sd a4, 72(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
-; NOREMAT-NEXT:    vle32.v v14, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a4)
-; NOREMAT-NEXT:    slli a4, s2, 10
-; NOREMAT-NEXT:    sd a4, 104(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    vle32.v v2, (a4)
+; NOREMAT-NEXT:    vle32.v v18, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v4
+; NOREMAT-NEXT:    slli a4, t0, 10
+; NOREMAT-NEXT:    sd a4, 64(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
-; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a4)
-; NOREMAT-NEXT:    li t5, 27
-; NOREMAT-NEXT:    slli a4, t5, 9
-; NOREMAT-NEXT:    sd a4, 96(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    vle32.v v4, (a4)
+; NOREMAT-NEXT:    vle32.v v20, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v24
+; NOREMAT-NEXT:    addiw a4, a6, 1536
+; NOREMAT-NEXT:    sd a4, 56(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a4, a0, a4
-; NOREMAT-NEXT:    vle32.v v14, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    vle32.v v6, (a4)
+; NOREMAT-NEXT:    vle32.v v22, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v26
 ; NOREMAT-NEXT:    slli a3, a3, 11
-; NOREMAT-NEXT:    sd a3, 88(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sd a3, 48(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a0, a3
 ; NOREMAT-NEXT:    vle32.v v12, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
 ; NOREMAT-NEXT:    vle32.v v8, (a3)
-; NOREMAT-NEXT:    li t2, 29
-; NOREMAT-NEXT:    slli a3, t2, 9
-; NOREMAT-NEXT:    sd a3, 80(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a3, a0, a3
-; NOREMAT-NEXT:    vle32.v v14, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a3)
-; NOREMAT-NEXT:    slli a3, t0, 10
-; NOREMAT-NEXT:    sd a3, 72(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    addi a3, sp, 288
+; NOREMAT-NEXT:    vs2r.v v8, (a3) # Unknown-size Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v28
+; NOREMAT-NEXT:    lui s1, 5
+; NOREMAT-NEXT:    addiw a3, s1, -1536
+; NOREMAT-NEXT:    sd a3, 40(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a0, a3
-; NOREMAT-NEXT:    vle32.v v12, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
 ; NOREMAT-NEXT:    vle32.v v8, (a3)
-; NOREMAT-NEXT:    li a5, 31
-; NOREMAT-NEXT:    slli a3, a5, 9
-; NOREMAT-NEXT:    sd a3, 64(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    vle32.v v24, (a3)
+; NOREMAT-NEXT:    csrr a3, vlenb
+; NOREMAT-NEXT:    slli a3, a3, 2
+; NOREMAT-NEXT:    add a3, sp, a3
+; NOREMAT-NEXT:    addi a3, a3, 288
+; NOREMAT-NEXT:    vl2r.v v10, (a3) # Unknown-size Folded Reload
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v30
+; NOREMAT-NEXT:    slli a3, a7, 10
+; NOREMAT-NEXT:    sd a3, 32(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a0, a3
-; NOREMAT-NEXT:    vle32.v v14, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
 ; NOREMAT-NEXT:    vle32.v v10, (a3)
-; NOREMAT-NEXT:    lui a4, 4
-; NOREMAT-NEXT:    add a3, a0, a4
-; NOREMAT-NEXT:    vle32.v v12, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a3)
-; NOREMAT-NEXT:    addiw a3, a4, 512
-; NOREMAT-NEXT:    sd a3, 56(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a3, a0, a3
 ; NOREMAT-NEXT:    vle32.v v14, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a3)
-; NOREMAT-NEXT:    slli a3, a7, 10
-; NOREMAT-NEXT:    sd a3, 48(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a3, a0, a3
-; NOREMAT-NEXT:    vle32.v v12, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a3)
-; NOREMAT-NEXT:    addiw a3, a4, 1536
-; NOREMAT-NEXT:    sd a3, 40(sp) # 8-byte Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v0
+; NOREMAT-NEXT:    addiw a3, s1, -512
+; NOREMAT-NEXT:    sd a3, 24(sp) # 8-byte Folded Spill
 ; NOREMAT-NEXT:    add a3, a0, a3
-; NOREMAT-NEXT:    vle32.v v14, (a3)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a3)
-; NOREMAT-NEXT:    slli a2, a2, 11
-; NOREMAT-NEXT:    sd a2, 32(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    lui s1, 5
-; NOREMAT-NEXT:    addiw a2, s1, -1536
-; NOREMAT-NEXT:    sd a2, 24(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    slli a2, a6, 10
-; NOREMAT-NEXT:    sd a2, 16(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    addiw a2, s1, -512
-; NOREMAT-NEXT:    sd a2, 8(sp) # 8-byte Folded Spill
-; NOREMAT-NEXT:    add a2, a0, a2
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    add a2, a0, s1
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    vle32.v v0, (a3)
+; NOREMAT-NEXT:    vle32.v v16, (a3)
+; NOREMAT-NEXT:    csrr a3, vlenb
+; NOREMAT-NEXT:    slli a3, a3, 1
+; NOREMAT-NEXT:    add a3, sp, a3
+; NOREMAT-NEXT:    addi a3, a3, 288
+; NOREMAT-NEXT:    vl2r.v v26, (a3) # Unknown-size Folded Reload
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v26, v2
+; NOREMAT-NEXT:    add a3, a0, s1
+; NOREMAT-NEXT:    vle32.v v26, (a3)
+; NOREMAT-NEXT:    vle32.v v28, (a3)
+; NOREMAT-NEXT:    csrr a3, vlenb
+; NOREMAT-NEXT:    slli a3, a3, 2
+; NOREMAT-NEXT:    add a3, sp, a3
+; NOREMAT-NEXT:    addi a3, a3, 288
+; NOREMAT-NEXT:    vs2r.v v28, (a3) # Unknown-size Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v4
 ; NOREMAT-NEXT:    addiw ra, s1, 512
-; NOREMAT-NEXT:    add a2, a0, ra
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a3, a0, ra
+; NOREMAT-NEXT:    vle32.v v28, (a3)
+; NOREMAT-NEXT:    vle32.v v30, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v6
 ; NOREMAT-NEXT:    slli s11, s10, 10
-; NOREMAT-NEXT:    add a2, a0, s11
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    add a3, a0, s11
+; NOREMAT-NEXT:    vle32.v v2, (a3)
+; NOREMAT-NEXT:    vle32.v v18, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v12
 ; NOREMAT-NEXT:    addiw s10, s1, 1536
-; NOREMAT-NEXT:    add a2, a0, s10
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a3, a0, s10
+; NOREMAT-NEXT:    vle32.v v4, (a3)
+; NOREMAT-NEXT:    vle32.v v20, (a3)
+; NOREMAT-NEXT:    addi a3, sp, 288
+; NOREMAT-NEXT:    vl2r.v v12, (a3) # Unknown-size Folded Reload
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v8
 ; NOREMAT-NEXT:    slli s9, s8, 11
-; NOREMAT-NEXT:    add a2, a0, s9
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    lui t1, 6
-; NOREMAT-NEXT:    addiw s8, t1, -1536
-; NOREMAT-NEXT:    add a2, a0, s8
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a3, a0, s9
+; NOREMAT-NEXT:    vle32.v v6, (a3)
+; NOREMAT-NEXT:    vle32.v v12, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v24, v10
+; NOREMAT-NEXT:    lui t0, 6
+; NOREMAT-NEXT:    addiw s8, t0, -1536
+; NOREMAT-NEXT:    add a3, a0, s8
+; NOREMAT-NEXT:    vle32.v v8, (a3)
+; NOREMAT-NEXT:    vle32.v v22, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v0
 ; NOREMAT-NEXT:    slli s7, s6, 10
-; NOREMAT-NEXT:    add a2, a0, s7
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    addiw s6, t1, -512
-; NOREMAT-NEXT:    add a2, a0, s6
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    add a2, a0, t1
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    addiw s5, t1, 512
-; NOREMAT-NEXT:    add a2, a0, s5
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a3, a0, s7
+; NOREMAT-NEXT:    vle32.v v10, (a3)
+; NOREMAT-NEXT:    vle32.v v14, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v26
+; NOREMAT-NEXT:    addiw s6, t0, -512
+; NOREMAT-NEXT:    add a3, a0, s6
+; NOREMAT-NEXT:    vle32.v v0, (a3)
+; NOREMAT-NEXT:    vle32.v v16, (a3)
+; NOREMAT-NEXT:    csrr a3, vlenb
+; NOREMAT-NEXT:    slli a3, a3, 2
+; NOREMAT-NEXT:    add a3, sp, a3
+; NOREMAT-NEXT:    addi a3, a3, 288
+; NOREMAT-NEXT:    vl2r.v v24, (a3) # Unknown-size Folded Reload
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v24, v28
+; NOREMAT-NEXT:    add a3, a0, t0
+; NOREMAT-NEXT:    vle32.v v24, (a3)
+; NOREMAT-NEXT:    vle32.v v26, (a3)
+; NOREMAT-NEXT:    csrr a3, vlenb
+; NOREMAT-NEXT:    slli a3, a3, 2
+; NOREMAT-NEXT:    add a3, sp, a3
+; NOREMAT-NEXT:    addi a3, a3, 288
+; NOREMAT-NEXT:    vs2r.v v26, (a3) # Unknown-size Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v2
+; NOREMAT-NEXT:    addiw s5, t0, 512
+; NOREMAT-NEXT:    add a3, a0, s5
+; NOREMAT-NEXT:    vle32.v v26, (a3)
+; NOREMAT-NEXT:    vle32.v v28, (a3)
+; NOREMAT-NEXT:    csrr a3, vlenb
+; NOREMAT-NEXT:    slli a3, a3, 1
+; NOREMAT-NEXT:    add a3, sp, a3
+; NOREMAT-NEXT:    addi a3, a3, 288
+; NOREMAT-NEXT:    vs2r.v v28, (a3) # Unknown-size Folded Spill
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v4
 ; NOREMAT-NEXT:    slli s4, s3, 10
-; NOREMAT-NEXT:    add a2, a0, s4
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
-; NOREMAT-NEXT:    addiw s3, t1, 1536
-; NOREMAT-NEXT:    add a2, a0, s3
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a3, a0, s4
+; NOREMAT-NEXT:    vle32.v v28, (a3)
+; NOREMAT-NEXT:    vle32.v v18, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v6
+; NOREMAT-NEXT:    addiw s3, t0, 1536
+; NOREMAT-NEXT:    add a3, a0, s3
+; NOREMAT-NEXT:    vle32.v v30, (a3)
+; NOREMAT-NEXT:    vle32.v v20, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v8
 ; NOREMAT-NEXT:    slli s2, s2, 11
-; NOREMAT-NEXT:    add a2, a0, s2
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    add a3, a0, s2
+; NOREMAT-NEXT:    vle32.v v2, (a3)
+; NOREMAT-NEXT:    vle32.v v12, (a3)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v10
 ; NOREMAT-NEXT:    lui a3, 7
 ; NOREMAT-NEXT:    addiw s0, a3, -1536
-; NOREMAT-NEXT:    add a2, a0, s0
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a4, a0, s0
+; NOREMAT-NEXT:    vle32.v v4, (a4)
+; NOREMAT-NEXT:    vle32.v v22, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v0
 ; NOREMAT-NEXT:    slli t6, t5, 10
-; NOREMAT-NEXT:    add a2, a0, t6
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    add a4, a0, t6
+; NOREMAT-NEXT:    vle32.v v6, (a4)
+; NOREMAT-NEXT:    vle32.v v14, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v24
 ; NOREMAT-NEXT:    addiw t5, a3, -512
-; NOREMAT-NEXT:    add a2, a0, t5
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    add a2, a0, a3
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    add a4, a0, t5
+; NOREMAT-NEXT:    vle32.v v0, (a4)
+; NOREMAT-NEXT:    vle32.v v16, (a4)
+; NOREMAT-NEXT:    csrr a4, vlenb
+; NOREMAT-NEXT:    slli a4, a4, 2
+; NOREMAT-NEXT:    add a4, sp, a4
+; NOREMAT-NEXT:    addi a4, a4, 288
+; NOREMAT-NEXT:    vl2r.v v8, (a4) # Unknown-size Folded Reload
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v26
+; NOREMAT-NEXT:    add a4, a0, a3
+; NOREMAT-NEXT:    vle32.v v26, (a4)
+; NOREMAT-NEXT:    vle32.v v8, (a4)
+; NOREMAT-NEXT:    csrr a4, vlenb
+; NOREMAT-NEXT:    slli a4, a4, 1
+; NOREMAT-NEXT:    add a4, sp, a4
+; NOREMAT-NEXT:    addi a4, a4, 288
+; NOREMAT-NEXT:    vl2r.v v10, (a4) # Unknown-size Folded Reload
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v28
 ; NOREMAT-NEXT:    addiw t4, a3, 512
-; NOREMAT-NEXT:    add a2, a0, t4
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
+; NOREMAT-NEXT:    add a4, a0, t4
+; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    vle32.v v24, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v18, v30
 ; NOREMAT-NEXT:    slli t3, t2, 10
-; NOREMAT-NEXT:    add a2, a0, t3
-; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    add a4, a0, t3
+; NOREMAT-NEXT:    vle32.v v18, (a4)
+; NOREMAT-NEXT:    vle32.v v28, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v20, v2
 ; NOREMAT-NEXT:    addiw t2, a3, 1536
-; NOREMAT-NEXT:    add a2, a0, t2
-; NOREMAT-NEXT:    vle32.v v14, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a2)
-; NOREMAT-NEXT:    slli t0, t0, 11
-; NOREMAT-NEXT:    add a2, a0, t0
+; NOREMAT-NEXT:    add a4, a0, t2
+; NOREMAT-NEXT:    vle32.v v20, (a4)
+; NOREMAT-NEXT:    vle32.v v30, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v12, v4
+; NOREMAT-NEXT:    slli t1, a2, 11
+; NOREMAT-NEXT:    add a2, a0, t1
 ; NOREMAT-NEXT:    vle32.v v12, (a2)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a2)
+; NOREMAT-NEXT:    vle32.v v2, (a2)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v22, v6
 ; NOREMAT-NEXT:    lui a2, 8
 ; NOREMAT-NEXT:    addiw a7, a2, -1536
 ; NOREMAT-NEXT:    add a4, a0, a7
-; NOREMAT-NEXT:    vle32.v v14, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    vle32.v v22, (a4)
+; NOREMAT-NEXT:    vle32.v v4, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v14, v0
 ; NOREMAT-NEXT:    slli a6, a5, 10
 ; NOREMAT-NEXT:    add a4, a0, a6
-; NOREMAT-NEXT:    vle32.v v12, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    vle32.v v8, (a4)
+; NOREMAT-NEXT:    vle32.v v14, (a4)
+; NOREMAT-NEXT:    vle32.v v0, (a4)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v16, v26
 ; NOREMAT-NEXT:    addiw a5, a2, -512
 ; NOREMAT-NEXT:    add a4, a0, a5
-; NOREMAT-NEXT:    vle32.v v14, (a4)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; NOREMAT-NEXT:    vle32.v v10, (a4)
+; NOREMAT-NEXT:    vle32.v v16, (a4)
+; NOREMAT-NEXT:    vle32.v v26, (a4)
 ; NOREMAT-NEXT:    add a0, a0, a2
-; NOREMAT-NEXT:    vle32.v v12, (a0)
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; NOREMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; NOREMAT-NEXT:    vle32.v v6, (a0)
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v8, v10
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v24, v18
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v28, v20
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v30, v12
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v2, v22
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v4, v14
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v0, v16
+; NOREMAT-NEXT:    sf.vc.vv 3, 0, v26, v6
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    addi a0, a1, 1024
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    addi a0, a1, 1536
-; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld a0, 272(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld a0, 264(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 256(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 248(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    lui a0, 1
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld a0, 240(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    lui a0, 1
+; NOREMAT-NEXT:    ld a0, 232(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 224(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 216(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 208(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 200(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 192(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    lui a0, 2
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld a0, 184(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    lui a0, 2
+; NOREMAT-NEXT:    ld a0, 176(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 168(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 160(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 152(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 144(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 136(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    lui a0, 3
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    ld a0, 120(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld a0, 128(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    lui a0, 3
+; NOREMAT-NEXT:    ld a0, 120(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 112(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 104(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 96(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 88(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 80(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    lui a0, 4
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    ld a0, 64(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    ld a0, 72(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    lui a0, 4
+; NOREMAT-NEXT:    ld a0, 64(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 56(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 48(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    ld a0, 40(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    ld a0, 32(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    add a0, a1, a0
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    ld a0, 16(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld a0, 24(sp) # 8-byte Folded Reload
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    ld a0, 8(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    add a0, a1, a0
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    add s1, a1, s1
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (s1)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add ra, a1, ra
+; NOREMAT-NEXT:    vse32.v v8, (ra)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (ra)
 ; NOREMAT-NEXT:    add s11, a1, s11
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (s11)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add s10, a1, s10
+; NOREMAT-NEXT:    vse32.v v8, (s10)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (s10)
 ; NOREMAT-NEXT:    add s9, a1, s9
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (s9)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add s8, a1, s8
+; NOREMAT-NEXT:    vse32.v v8, (s8)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (s8)
 ; NOREMAT-NEXT:    add s7, a1, s7
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (s7)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add s6, a1, s6
+; NOREMAT-NEXT:    vse32.v v8, (s6)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    add t0, a1, t0
+; NOREMAT-NEXT:    vse32.v v8, (t0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (s6)
-; NOREMAT-NEXT:    add t1, a1, t1
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
-; NOREMAT-NEXT:    vse32.v v8, (t1)
 ; NOREMAT-NEXT:    add s5, a1, s5
+; NOREMAT-NEXT:    vse32.v v8, (s5)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (s5)
 ; NOREMAT-NEXT:    add s4, a1, s4
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (s4)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add s3, a1, s3
+; NOREMAT-NEXT:    vse32.v v8, (s3)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (s3)
 ; NOREMAT-NEXT:    add s2, a1, s2
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (s2)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add s0, a1, s0
+; NOREMAT-NEXT:    vse32.v v8, (s0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (s0)
 ; NOREMAT-NEXT:    add t6, a1, t6
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (t6)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add t5, a1, t5
+; NOREMAT-NEXT:    vse32.v v8, (t5)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (t5)
 ; NOREMAT-NEXT:    add a3, a1, a3
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a3)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add t4, a1, t4
+; NOREMAT-NEXT:    vse32.v v8, (t4)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (t4)
 ; NOREMAT-NEXT:    add t3, a1, t3
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (t3)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    add t2, a1, t2
+; NOREMAT-NEXT:    vse32.v v8, (t2)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    add t1, a1, t1
+; NOREMAT-NEXT:    vse32.v v8, (t1)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (t2)
-; NOREMAT-NEXT:    add t0, a1, t0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
-; NOREMAT-NEXT:    vse32.v v8, (t0)
 ; NOREMAT-NEXT:    add a7, a1, a7
+; NOREMAT-NEXT:    vse32.v v8, (a7)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a7)
 ; NOREMAT-NEXT:    add a6, a1, a6
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a6)
-; NOREMAT-NEXT:    add a5, a1, a5
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a5)
-; NOREMAT-NEXT:    add a0, a1, a2
+; NOREMAT-NEXT:    add a5, a1, a5
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
-; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    addiw a0, a2, 512
-; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    vse32.v v8, (a5)
+; NOREMAT-NEXT:    add a0, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    addiw a0, a2, 1024
+; NOREMAT-NEXT:    addiw a0, a2, 512
 ; NOREMAT-NEXT:    add a0, a1, a0
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
-; NOREMAT-NEXT:    addiw a0, a2, 1536
+; NOREMAT-NEXT:    addiw a0, a2, 1024
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    vse32.v v10, (a0)
-; NOREMAT-NEXT:    li a0, 17
-; NOREMAT-NEXT:    slli a0, a0, 11
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    addiw a0, a2, 1536
 ; NOREMAT-NEXT:    add a0, a1, a0
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    li a0, 17
+; NOREMAT-NEXT:    slli a0, a0, 11
+; NOREMAT-NEXT:    add a0, a1, a0
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    lui a0, 9
 ; NOREMAT-NEXT:    addiw a2, a0, -1536
 ; NOREMAT-NEXT:    add a2, a1, a2
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a2)
-; NOREMAT-NEXT:    addiw a2, a0, -1024
-; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a2)
-; NOREMAT-NEXT:    addiw a2, a0, -512
+; NOREMAT-NEXT:    addiw a2, a0, -1024
 ; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    vse32.v v10, (a2)
-; NOREMAT-NEXT:    add a2, a1, a0
+; NOREMAT-NEXT:    addiw a2, a0, -512
+; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a2)
-; NOREMAT-NEXT:    addiw a2, a0, 512
-; NOREMAT-NEXT:    add a2, a1, a2
+; NOREMAT-NEXT:    add a2, a1, a0
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    vse32.v v10, (a2)
-; NOREMAT-NEXT:    addiw a2, a0, 1024
+; NOREMAT-NEXT:    addiw a2, a0, 512
 ; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a2)
+; NOREMAT-NEXT:    addiw a2, a0, 1024
+; NOREMAT-NEXT:    add a2, a1, a2
+; NOREMAT-NEXT:    vse32.v v10, (a2)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    addiw a0, a0, 1536
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    li a0, 19
 ; NOREMAT-NEXT:    slli a0, a0, 11
 ; NOREMAT-NEXT:    add a0, a1, a0
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
-; NOREMAT-NEXT:    vse32.v v8, (a0)
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    vse32.v v10, (a0)
 ; NOREMAT-NEXT:    lui a0, 10
 ; NOREMAT-NEXT:    addiw a2, a0, -1536
 ; NOREMAT-NEXT:    add a2, a1, a2
-; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a2)
-; NOREMAT-NEXT:    addiw a2, a0, -1024
-; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a2)
-; NOREMAT-NEXT:    addiw a2, a0, -512
+; NOREMAT-NEXT:    addiw a2, a0, -1024
 ; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; NOREMAT-NEXT:    vse32.v v10, (a2)
-; NOREMAT-NEXT:    add a2, a1, a0
+; NOREMAT-NEXT:    addiw a2, a0, -512
+; NOREMAT-NEXT:    add a2, a1, a2
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; NOREMAT-NEXT:    vse32.v v8, (a2)
+; NOREMAT-NEXT:    add a2, a1, a0
+; NOREMAT-NEXT:    vse32.v v10, (a2)
 ; NOREMAT-NEXT:    addiw a0, a0, 512
 ; NOREMAT-NEXT:    add a0, a1, a0
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    vse32.v v10, (a0)
+; NOREMAT-NEXT:    vse32.v v8, (a0)
 ; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; NOREMAT-NEXT:    ld ra, 360(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s0, 352(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s1, 344(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s2, 336(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s3, 328(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s4, 320(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s5, 312(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s6, 304(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s7, 296(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s8, 288(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s9, 280(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s10, 272(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    ld s11, 264(sp) # 8-byte Folded Reload
-; NOREMAT-NEXT:    addi sp, sp, 368
+; NOREMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
+; NOREMAT-NEXT:    csrr a0, vlenb
+; NOREMAT-NEXT:    li a1, 6
+; NOREMAT-NEXT:    mul a0, a0, a1
+; NOREMAT-NEXT:    add sp, sp, a0
+; NOREMAT-NEXT:    ld ra, 392(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s0, 384(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s1, 376(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s2, 368(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s3, 360(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s4, 352(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s5, 344(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s6, 336(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s7, 328(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s8, 320(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s9, 312(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s10, 304(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    ld s11, 296(sp) # 8-byte Folded Reload
+; NOREMAT-NEXT:    addi sp, sp, 400
 ; NOREMAT-NEXT:    ret
 ;
 ; REMAT-LABEL: test:
@@ -864,512 +927,512 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    add a2, a0, a2
 ; REMAT-NEXT:    vle32.v v14, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v12, (a2)
 ; REMAT-NEXT:    li a2, 11
 ; REMAT-NEXT:    slli a2, a2, 10
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    vle32.v v16, (a2)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v10, (a2)
 ; REMAT-NEXT:    li a2, 23
 ; REMAT-NEXT:    slli a2, a2, 9
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v26, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v16
+; REMAT-NEXT:    vle32.v v12, (a2)
 ; REMAT-NEXT:    lui a2, 3
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v28, (a2)
+; REMAT-NEXT:    vle32.v v14, (a2)
 ; REMAT-NEXT:    li a2, 25
 ; REMAT-NEXT:    slli a2, a2, 9
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v30, (a2)
+; REMAT-NEXT:    vle32.v v16, (a2)
 ; REMAT-NEXT:    li a2, 13
 ; REMAT-NEXT:    slli a2, a2, 10
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v0, (a2)
+; REMAT-NEXT:    vle32.v v18, (a2)
 ; REMAT-NEXT:    li a2, 27
 ; REMAT-NEXT:    slli a2, a2, 9
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v2, (a2)
+; REMAT-NEXT:    vle32.v v20, (a2)
 ; REMAT-NEXT:    li a2, 7
 ; REMAT-NEXT:    slli a2, a2, 11
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v4, (a2)
+; REMAT-NEXT:    vle32.v v22, (a2)
 ; REMAT-NEXT:    li a2, 29
 ; REMAT-NEXT:    slli a2, a2, 9
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v24, (a2)
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v26
 ; REMAT-NEXT:    li a2, 15
 ; REMAT-NEXT:    slli a2, a2, 10
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v26, (a2)
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v28
 ; REMAT-NEXT:    li a2, 31
 ; REMAT-NEXT:    slli a2, a2, 9
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v28, (a2)
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v30
 ; REMAT-NEXT:    lui a2, 4
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v30, (a2)
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v0
 ; REMAT-NEXT:    lui a2, 4
 ; REMAT-NEXT:    addiw a2, a2, 512
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v0, (a2)
+; REMAT-NEXT:    vle32.v v16, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v2
 ; REMAT-NEXT:    li a2, 17
 ; REMAT-NEXT:    slli a2, a2, 10
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v2, (a2)
+; REMAT-NEXT:    vle32.v v18, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v4
 ; REMAT-NEXT:    lui a2, 4
 ; REMAT-NEXT:    addiw a2, a2, 1536
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v4, (a2)
+; REMAT-NEXT:    vle32.v v20, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v24
 ; REMAT-NEXT:    li a2, 9
 ; REMAT-NEXT:    slli a2, a2, 11
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v24, (a2)
+; REMAT-NEXT:    vle32.v v22, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v26
 ; REMAT-NEXT:    lui a2, 5
 ; REMAT-NEXT:    addiw a2, a2, -1536
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v26, (a2)
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v28
 ; REMAT-NEXT:    li a2, 19
 ; REMAT-NEXT:    slli a2, a2, 10
 ; REMAT-NEXT:    add a2, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v28, (a2)
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v30
 ; REMAT-NEXT:    lui ra, 5
 ; REMAT-NEXT:    addiw ra, ra, -512
 ; REMAT-NEXT:    add a2, a0, ra
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v30, (a2)
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v0
 ; REMAT-NEXT:    lui s11, 5
 ; REMAT-NEXT:    add a2, a0, s11
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v0, (a2)
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v2
 ; REMAT-NEXT:    lui s10, 5
 ; REMAT-NEXT:    addiw s10, s10, 512
 ; REMAT-NEXT:    add a2, a0, s10
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v2, (a2)
+; REMAT-NEXT:    vle32.v v16, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v4
 ; REMAT-NEXT:    li s9, 21
 ; REMAT-NEXT:    slli s9, s9, 10
 ; REMAT-NEXT:    add a2, a0, s9
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v4, (a2)
+; REMAT-NEXT:    vle32.v v18, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v24
 ; REMAT-NEXT:    lui s8, 5
 ; REMAT-NEXT:    addiw s8, s8, 1536
 ; REMAT-NEXT:    add a2, a0, s8
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v24, (a2)
+; REMAT-NEXT:    vle32.v v20, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v26
 ; REMAT-NEXT:    li s7, 11
 ; REMAT-NEXT:    slli s7, s7, 11
 ; REMAT-NEXT:    add a2, a0, s7
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v26, (a2)
+; REMAT-NEXT:    vle32.v v22, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v28
 ; REMAT-NEXT:    lui s6, 6
 ; REMAT-NEXT:    addiw s6, s6, -1536
 ; REMAT-NEXT:    add a2, a0, s6
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v28, (a2)
+; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v30
 ; REMAT-NEXT:    li s5, 23
 ; REMAT-NEXT:    slli s5, s5, 10
 ; REMAT-NEXT:    add a2, a0, s5
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v30, (a2)
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v0
 ; REMAT-NEXT:    lui s4, 6
 ; REMAT-NEXT:    addiw s4, s4, -512
 ; REMAT-NEXT:    add a2, a0, s4
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v0, (a2)
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v2
 ; REMAT-NEXT:    lui s3, 6
 ; REMAT-NEXT:    add a2, a0, s3
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v2, (a2)
+; REMAT-NEXT:    vle32.v v14, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v4
 ; REMAT-NEXT:    lui s2, 6
 ; REMAT-NEXT:    addiw s2, s2, 512
 ; REMAT-NEXT:    add a2, a0, s2
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v4, (a2)
+; REMAT-NEXT:    vle32.v v16, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v24
 ; REMAT-NEXT:    li s1, 25
 ; REMAT-NEXT:    slli s1, s1, 10
 ; REMAT-NEXT:    add a2, a0, s1
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v6, (a2)
+; REMAT-NEXT:    vle32.v v18, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v26
 ; REMAT-NEXT:    lui s0, 6
 ; REMAT-NEXT:    addiw s0, s0, 1536
 ; REMAT-NEXT:    add a2, a0, s0
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v26, (a2)
+; REMAT-NEXT:    vle32.v v20, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v28
 ; REMAT-NEXT:    li t6, 13
 ; REMAT-NEXT:    slli t6, t6, 11
 ; REMAT-NEXT:    add a2, a0, t6
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v28, (a2)
+; REMAT-NEXT:    vle32.v v22, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v30
 ; REMAT-NEXT:    lui t5, 7
 ; REMAT-NEXT:    addiw t5, t5, -1536
 ; REMAT-NEXT:    add a2, a0, t5
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v30, (a2)
+; REMAT-NEXT:    vle32.v v24, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v0
 ; REMAT-NEXT:    li t4, 27
 ; REMAT-NEXT:    slli t4, t4, 10
 ; REMAT-NEXT:    add a2, a0, t4
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v0, (a2)
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v2
 ; REMAT-NEXT:    lui t3, 7
 ; REMAT-NEXT:    addiw t3, t3, -512
 ; REMAT-NEXT:    add a2, a0, t3
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v2, (a2)
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v14, v4
 ; REMAT-NEXT:    lui t2, 7
 ; REMAT-NEXT:    add a2, a0, t2
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
+; REMAT-NEXT:    vle32.v v4, (a2)
 ; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v6
 ; REMAT-NEXT:    lui t1, 7
 ; REMAT-NEXT:    addiw t1, t1, 512
 ; REMAT-NEXT:    add a2, a0, t1
 ; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v16, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v18, v26
 ; REMAT-NEXT:    li t0, 29
 ; REMAT-NEXT:    slli t0, t0, 10
 ; REMAT-NEXT:    add a2, a0, t0
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v18, (a2)
+; REMAT-NEXT:    vle32.v v26, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v20, v28
 ; REMAT-NEXT:    lui a7, 7
 ; REMAT-NEXT:    addiw a7, a7, 1536
 ; REMAT-NEXT:    add a2, a0, a7
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v20, (a2)
+; REMAT-NEXT:    vle32.v v28, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v22, v30
 ; REMAT-NEXT:    li a6, 15
 ; REMAT-NEXT:    slli a6, a6, 11
 ; REMAT-NEXT:    add a2, a0, a6
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v22, (a2)
+; REMAT-NEXT:    vle32.v v30, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v24, v0
 ; REMAT-NEXT:    lui a5, 8
 ; REMAT-NEXT:    addiw a5, a5, -1536
 ; REMAT-NEXT:    add a2, a0, a5
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v24, (a2)
+; REMAT-NEXT:    vle32.v v0, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v2
 ; REMAT-NEXT:    li a4, 31
 ; REMAT-NEXT:    slli a4, a4, 10
 ; REMAT-NEXT:    add a2, a0, a4
-; REMAT-NEXT:    vle32.v v12, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    vle32.v v8, (a2)
+; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v2, (a2)
+; REMAT-NEXT:    sf.vc.vv 3, 0, v12, v4
 ; REMAT-NEXT:    lui a3, 8
 ; REMAT-NEXT:    addiw a3, a3, -512
 ; REMAT-NEXT:    add a2, a0, a3
-; REMAT-NEXT:    vle32.v v14, (a2)
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
-; REMAT-NEXT:    vle32.v v10, (a2)
+; REMAT-NEXT:    vle32.v v12, (a2)
+; REMAT-NEXT:    vle32.v v4, (a2)
 ; REMAT-NEXT:    lui a2, 8
 ; REMAT-NEXT:    add a0, a0, a2
-; REMAT-NEXT:    vle32.v v12, (a0)
+; REMAT-NEXT:    vle32.v v6, (a0)
 ; REMAT-NEXT:    sf.vc.vv 3, 0, v8, v14
-; REMAT-NEXT:    sf.vc.vv 3, 0, v10, v12
+; REMAT-NEXT:    sf.vc.vv 3, 0, v16, v18
+; REMAT-NEXT:    sf.vc.vv 3, 0, v26, v20
+; REMAT-NEXT:    sf.vc.vv 3, 0, v28, v22
+; REMAT-NEXT:    sf.vc.vv 3, 0, v30, v24
+; REMAT-NEXT:    sf.vc.vv 3, 0, v0, v10
+; REMAT-NEXT:    sf.vc.vv 3, 0, v2, v12
+; REMAT-NEXT:    sf.vc.vv 3, 0, v4, v6
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    addi a0, a1, 1024
 ; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    addi a0, a1, 1536
-; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 1
 ; REMAT-NEXT:    slli a0, a0, 11
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 5
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 3
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 7
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    lui a0, 1
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 9
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 5
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 11
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 3
 ; REMAT-NEXT:    slli a0, a0, 11
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 13
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 7
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 15
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    lui a0, 2
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 17
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 9
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 19
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 5
 ; REMAT-NEXT:    slli a0, a0, 11
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 21
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 11
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 23
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    lui a0, 3
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 25
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 13
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 27
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 7
 ; REMAT-NEXT:    slli a0, a0, 11
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 29
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 15
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    li a0, 31
 ; REMAT-NEXT:    slli a0, a0, 9
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    lui a0, 4
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    lui a0, 4
 ; REMAT-NEXT:    addiw a0, a0, 512
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 17
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    lui a0, 4
 ; REMAT-NEXT:    addiw a0, a0, 1536
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 9
 ; REMAT-NEXT:    slli a0, a0, 11
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    lui a0, 5
 ; REMAT-NEXT:    addiw a0, a0, -1536
 ; REMAT-NEXT:    add a0, a1, a0
+; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    li a0, 19
 ; REMAT-NEXT:    slli a0, a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add ra, a1, ra
+; REMAT-NEXT:    vse32.v v8, (ra)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (ra)
 ; REMAT-NEXT:    add s11, a1, s11
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (s11)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add s10, a1, s10
+; REMAT-NEXT:    vse32.v v8, (s10)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (s10)
 ; REMAT-NEXT:    add s9, a1, s9
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (s9)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add s8, a1, s8
+; REMAT-NEXT:    vse32.v v8, (s8)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (s8)
 ; REMAT-NEXT:    add s7, a1, s7
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (s7)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add s6, a1, s6
+; REMAT-NEXT:    vse32.v v8, (s6)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (s6)
 ; REMAT-NEXT:    add s5, a1, s5
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (s5)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add s4, a1, s4
+; REMAT-NEXT:    vse32.v v8, (s4)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (s4)
 ; REMAT-NEXT:    add s3, a1, s3
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (s3)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add s2, a1, s2
+; REMAT-NEXT:    vse32.v v8, (s2)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (s2)
 ; REMAT-NEXT:    add s1, a1, s1
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (s1)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add s0, a1, s0
+; REMAT-NEXT:    vse32.v v8, (s0)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (s0)
 ; REMAT-NEXT:    add t6, a1, t6
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (t6)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add t5, a1, t5
+; REMAT-NEXT:    vse32.v v8, (t5)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (t5)
 ; REMAT-NEXT:    add t4, a1, t4
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (t4)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add t3, a1, t3
+; REMAT-NEXT:    vse32.v v8, (t3)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (t3)
 ; REMAT-NEXT:    add t2, a1, t2
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (t2)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add t1, a1, t1
+; REMAT-NEXT:    vse32.v v8, (t1)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (t1)
 ; REMAT-NEXT:    add t0, a1, t0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (t0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add a7, a1, a7
+; REMAT-NEXT:    vse32.v v8, (a7)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a7)
 ; REMAT-NEXT:    add a6, a1, a6
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a6)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add a5, a1, a5
+; REMAT-NEXT:    vse32.v v8, (a5)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a5)
 ; REMAT-NEXT:    add a4, a1, a4
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a4)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    add a3, a1, a3
+; REMAT-NEXT:    vse32.v v8, (a3)
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a3)
 ; REMAT-NEXT:    add a2, a1, a2
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a2)
@@ -1449,13 +1512,13 @@ define void @test(ptr %0, ptr %1, i64 %2) {
 ; REMAT-NEXT:    vse32.v v10, (a0)
 ; REMAT-NEXT:    lui a0, 10
 ; REMAT-NEXT:    add a0, a1, a0
-; REMAT-NEXT:    sf.vc.v.i 2, 0, v10, 0
 ; REMAT-NEXT:    vse32.v v8, (a0)
 ; REMAT-NEXT:    lui a0, 10
 ; REMAT-NEXT:    addiw a0, a0, 512
 ; REMAT-NEXT:    add a0, a1, a0
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
-; REMAT-NEXT:    vse32.v v10, (a0)
+; REMAT-NEXT:    vse32.v v8, (a0)
+; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    sf.vc.v.i 2, 0, v8, 0
 ; REMAT-NEXT:    ld ra, 104(sp) # 8-byte Folded Reload
 ; REMAT-NEXT:    ld s0, 96(sp) # 8-byte Folded Reload



More information about the llvm-commits mailing list